com.thinkbiganalytics.policy.standardization.DateTimeStandardizer.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.policy.standardization.DateTimeStandardizer.java

Source

package com.thinkbiganalytics.policy.standardization;

/*-
 * #%L
 * thinkbig-field-policy-default
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.thinkbiganalytics.policy.PolicyProperty;
import com.thinkbiganalytics.policy.PolicyPropertyRef;
import com.thinkbiganalytics.policy.PolicyPropertyTypes;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.TimeZone;

/**
 * Convert date time by a provided input format to an ISO8601 format used by Hive.  If the input format is null, the date is
 * assumed to be Java epoch time, otherwise the formatting pattern is used to convert the date.
 */
@Standardizer(name = "Date/Time", description = "Converts any date to Hive-friendly format with optional timezone conversion")
public class DateTimeStandardizer implements StandardizationPolicy {

    private static final Logger log = LoggerFactory.getLogger(DateTimeStandardizer.class);
    @PolicyProperty(name = "Date Format", hint = "Format Example: MM/dd/YYYY.  If converting from Unix timestamp leave empty.")
    private String inputDateFormat;
    @PolicyProperty(name = "Output Format", hint = "Choose an output format", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {
            "DATE_ONLY", "DATETIME", "DATETIME_NOMILLIS" }, required = true)
    private OutputFormats outputFormat = OutputFormats.DATE_ONLY;
    /**
     * Whether the reference timezone is encoded in the ISO8601 date or specified as configuration
     */
    @PolicyProperty(name = "Input timezone", hint = "Input timezone (optional)", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {
            "", "ACT", "AET", "AGT", "ART", "AST", "BET", "BST", "CAT", "CNT", "CST", "CTT", "EAT", "ECT", "IET",
            "IST", "JST", "MIT", "NET", "NST", "PLT", "PNT", "PRT", "PST", "SST", "UTC", "VST", "EST", "MST",
            "HST" }, value = "")
    private String inputTimezone;
    /**
     * Whether the reference timezone is encoded in the ISO8601 date or specified as configuration
     */
    @PolicyProperty(name = "Output timezone", hint = "Targeted timezone (optional)", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {
            "", "ACT", "AET", "AGT", "ART", "AST", "BET", "BST", "CAT", "CNT", "CST", "CTT", "EAT", "ECT", "IET",
            "IST", "JST", "MIT", "NET", "NST", "PLT", "PNT", "PRT", "PST", "SST", "UTC", "VST", "EST", "MST",
            "HST" }, value = "")
    private String outputTimezone;
    private transient DateTimeFormatter outputFormatter;
    private transient DateTimeFormatter inputFormatter;
    private boolean valid;

    public DateTimeStandardizer(OutputFormats outputFormat) {
        this(null, outputFormat, null, null);
    }

    public DateTimeStandardizer(String inputDateFormat, OutputFormats outputFormat) {
        this(inputDateFormat, outputFormat, null, null);
    }

    public DateTimeStandardizer(@PolicyPropertyRef(name = "Date Format") String inputDateFormat,
            @PolicyPropertyRef(name = "Output Format") OutputFormats outputFormat,
            @PolicyPropertyRef(name = "Input Timezone") String inputTimezone,
            @PolicyPropertyRef(name = "Output Timezone") String outputTimezone) {

        Validate.notNull(outputFormat);
        this.inputDateFormat = inputDateFormat;
        this.outputFormat = outputFormat;
        this.inputTimezone = inputTimezone;
        this.outputTimezone = outputTimezone;
        initializeFormatters();
    }

    /**
     * Unix timestamp is in seconds.. not ms.  detect if the string has only 10 chars being its in seconds, not ms
     */
    private boolean isInputUnixTimestamp(String value) {
        return StringUtils.isNotBlank(value) && StringUtils.isNumeric(value) && value.length() == 10;
    }

    @Override
    public String convertValue(String value) {
        if (valid) {
            try {
                if (inputFormatter == null) {
                    if (isInputUnixTimestamp(value)) {
                        //unix timestamp are in seconds
                        long lValue = Long.parseLong(value);
                        lValue *= 1000;
                        return outputFormatter.print(lValue);
                    } else {
                        long lValue = Long.parseLong(value);
                        return outputFormatter.print(lValue);
                    }
                }

                DateTime dt = inputFormatter.parseDateTime(value);

                return outputFormatter.print(dt);

            } catch (IllegalArgumentException e) {
                log.debug("Failed to convert string [{}] to date pattern [{}], value, inputDateFormat");
            }
        }
        return value;
    }

    /**
     * Returns a time formatter for the specified timezone
     *
     * @param format   the current formatter
     * @param timezone the timezone string
     * @return a time formatter for the specified timezone
     */
    protected DateTimeFormatter formatterForTimezone(DateTimeFormatter format, String timezone) {

        if (StringUtils.isEmpty(timezone)) {
            return format;
        }
        if ("UTC".equals(timezone)) {
            return format.withZoneUTC();
        }
        return format.withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezone)));

    }

    protected void initializeFormatters() {
        try {
            valid = false;
            if (outputFormat == null) {
                outputFormat = OutputFormats.DATE_ONLY;
            }
            switch (outputFormat) {
            case DATE_ONLY:
                this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
                break;
            case DATETIME:
                this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS");
                break;

            case DATETIME_NOMILLIS:
                this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
                break;
            }
            this.outputFormatter = formatterForTimezone(this.outputFormatter, outputTimezone);
            if (StringUtils.isNotBlank(inputDateFormat)) {
                this.inputFormatter = DateTimeFormat.forPattern(this.inputDateFormat);
                this.inputFormatter = formatterForTimezone(this.inputFormatter, inputTimezone);
            }
            valid = true;
        } catch (IllegalArgumentException e) {
            log.warn(
                    "Illegal configuration input format [{}], tz [{}] Output format  [{}], tz [{}]"
                            + "]. Standardizer will be skipped.",
                    inputDateFormat, inputTimezone, outputFormat, outputTimezone);
        }
    }

    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        initializeFormatters();
    }

    public String getInputDateFormat() {
        return inputDateFormat;
    }

    public OutputFormats getOutputFormat() {
        return outputFormat;
    }

    public enum OutputFormats {
        DATE_ONLY, DATETIME, DATETIME_NOMILLIS
    }

    public Boolean accepts(Object value) {
        return (value instanceof String);
    }

    public Object convertRawValue(Object value) {
        if (accepts(value)) {
            return String.valueOf(convertValue(value.toString()));
        }

        return value;
    }
}