org.apache.hadoop.hive.ql.parse.DruidSqlOperatorConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.DruidSqlOperatorConverter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import org.apache.calcite.adapter.druid.DirectOperatorConversion;
import org.apache.calcite.adapter.druid.DruidExpressions;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.adapter.druid.ExtractOperatorConversion;
import org.apache.calcite.adapter.druid.FloorOperatorConversion;
import org.apache.calcite.adapter.druid.UnarySuffixOperatorConversion;
import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveDateAddSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveDateSubSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFromUnixTimeSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTruncSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator;
import org.joda.time.Period;

import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

/**
 * Contains custom Druid SQL operator converter classes, contains either:
 * Hive specific OperatorConversion logic that can not be part of Calcite
 * Some temporary OperatorConversion that is not release by Calcite yet
 */
public class DruidSqlOperatorConverter {

    private static final String YYYY_MM_DD = "yyyy-MM-dd";
    public static final String DEFAULT_TS_FORMAT = "yyyy-MM-dd HH:mm:ss";

    private DruidSqlOperatorConverter() {
    }

    private static Map druidOperatorMap = null;

    public static final Map<SqlOperator, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter> getDefaultMap() {
        if (druidOperatorMap == null) {
            druidOperatorMap = new HashMap<SqlOperator, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>();
            DruidQuery.DEFAULT_OPERATORS_LIST.stream()
                    .forEach(op -> druidOperatorMap.put(op.calciteOperator(), op));

            //Override Hive specific operators
            druidOperatorMap.putAll(Maps.asMap(HiveFloorDate.ALL_FUNCTIONS,
                    (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new FloorOperatorConversion()));
            druidOperatorMap.putAll(Maps.asMap(HiveExtractDate.ALL_FUNCTIONS,
                    (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new ExtractOperatorConversion()));
            druidOperatorMap.put(HiveConcat.INSTANCE, new DirectOperatorConversion(HiveConcat.INSTANCE, "concat"));
            druidOperatorMap.put(SqlStdOperatorTable.SUBSTRING,
                    new DruidSqlOperatorConverter.DruidSubstringOperatorConversion());
            druidOperatorMap.put(SqlStdOperatorTable.IS_NULL,
                    new UnaryFunctionOperatorConversion(SqlStdOperatorTable.IS_NULL, "isnull"));
            druidOperatorMap.put(SqlStdOperatorTable.IS_NOT_NULL,
                    new UnaryFunctionOperatorConversion(SqlStdOperatorTable.IS_NOT_NULL, "notnull"));
            druidOperatorMap.put(HiveTruncSqlOperator.INSTANCE, new DruidDateTruncOperatorConversion());
            druidOperatorMap.put(HiveToDateSqlOperator.INSTANCE, new DruidToDateOperatorConversion());
            druidOperatorMap.put(HiveFromUnixTimeSqlOperator.INSTANCE, new DruidFormUnixTimeOperatorConversion());
            druidOperatorMap.put(HiveUnixTimestampSqlOperator.INSTANCE, new DruidUnixTimestampOperatorConversion());
            druidOperatorMap.put(HiveDateAddSqlOperator.INSTANCE,
                    new DruidDateArithmeticOperatorConversion(1, HiveDateAddSqlOperator.INSTANCE));
            druidOperatorMap.put(HiveDateSubSqlOperator.INSTANCE,
                    new DruidDateArithmeticOperatorConversion(-1, HiveDateSubSqlOperator.INSTANCE));
        }
        return druidOperatorMap;
    }

    /**
     * Druid operator converter from Hive Substring to Druid SubString.
     * This is a temporary fix that can be removed once we move to a Calcite version including the following.
     * https://issues.apache.org/jira/browse/CALCITE-2226
     */
    public static class DruidSubstringOperatorConversion
            extends org.apache.calcite.adapter.druid.SubstringOperatorConversion {
        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
            if (arg == null) {
                return null;
            }

            final String indexStart;
            final String length;
            // SQL is 1-indexed, Druid is 0-indexed.
            if (!call.getOperands().get(1).isA(SqlKind.LITERAL)) {
                final String indexExp = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType,
                        query);
                if (indexExp == null) {
                    return null;
                }
                indexStart = DruidQuery.format("(%s - 1)", indexExp);
            } else {
                final int index = RexLiteral.intValue(call.getOperands().get(1)) - 1;
                indexStart = DruidExpressions.numberLiteral(index);
            }

            if (call.getOperands().size() > 2) {
                //case substring from index with length
                if (!call.getOperands().get(2).isA(SqlKind.LITERAL)) {
                    length = DruidExpressions.toDruidExpression(call.getOperands().get(2), rowType, query);
                    if (length == null) {
                        return null;
                    }
                } else {
                    length = DruidExpressions.numberLiteral(RexLiteral.intValue(call.getOperands().get(2)));
                }

            } else {
                //case substring from index to the end
                length = DruidExpressions.numberLiteral(-1);
            }
            return DruidQuery.format("substring(%s, %s, %s)", arg, indexStart, length);
        }
    }

    /**
     * Operator conversion form Hive TRUNC UDF to Druid Date Time UDFs.
     */
    public static class DruidDateTruncOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        @Override
        public SqlOperator calciteOperator() {
            return HiveTruncSqlOperator.INSTANCE;
        }

        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            //can handle only case trunc date type
            if (call.getOperands().size() < 1) {
                throw new IllegalStateException(
                        "trunc() requires at least 1 argument, got " + call.getOperands().size());
            }
            if (call.getOperands().size() == 1) {
                final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
                if (arg == null) {
                    return null;
                }
                if (SqlTypeUtil.isDatetime(call.getOperands().get(0).getType())) {
                    final TimeZone tz = timezoneId(query, call.getOperands().get(0));
                    return applyTimestampFormat(
                            DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", tz),
                            YYYY_MM_DD, tz);
                }
                return null;
            } else if (call.getOperands().size() == 2) {
                final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
                if (arg == null) {
                    return null;
                }
                String granularity = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
                if (granularity == null) {
                    return null;
                }
                final String unit;
                if ("'MONTH'".equals(granularity) || "'MON'".equals(granularity) || "'MM'".equals(granularity)) {
                    unit = Period.months(1).toString();
                } else if ("'YEAR'".equals(granularity) || "'YYYY'".equals(granularity)
                        || "'YY'".equals(granularity)) {
                    unit = Period.years(1).toString();
                } else if ("'QUARTER'".equals(granularity) || "'Q'".equals(granularity)) {
                    unit = Period.months(3).toString();
                } else {
                    unit = null;
                }
                if (unit == null) {
                    //bail out can not infer unit
                    return null;
                }
                final TimeZone tz = timezoneId(query, call.getOperands().get(0));
                return applyTimestampFormat(DruidExpressions.applyTimestampFloor(arg, unit, "", tz), YYYY_MM_DD,
                        tz);
            }
            return null;
        }
    }

    /**
     * Expression operator conversion form Hive TO_DATE operator to Druid Date cast.
     */
    public static class DruidToDateOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        @Override
        public SqlOperator calciteOperator() {
            return HiveToDateSqlOperator.INSTANCE;
        }

        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            if (call.getOperands().size() != 1) {
                throw new IllegalStateException("to_date() requires 1 argument, got " + call.getOperands().size());
            }
            final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
            if (arg == null) {
                return null;
            }
            return DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "",
                    timezoneId(query, call.getOperands().get(0)));
        }
    }

    public static class DruidUnixTimestampOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        @Override
        public SqlOperator calciteOperator() {
            return HiveUnixTimestampSqlOperator.INSTANCE;
        }

        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            final String arg0 = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
            if (arg0 == null) {
                return null;
            }
            if (SqlTypeUtil.isDatetime((call.getOperands().get(0).getType()))) {
                // Timestamp is represented as long internally no need to any thing here
                return DruidExpressions.functionCall("div",
                        ImmutableList.of(arg0, DruidExpressions.numberLiteral(1000)));
            }
            // dealing with String type
            final String format = call.getOperands().size() == 2
                    ? DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query)
                    : DEFAULT_TS_FORMAT;
            return DruidExpressions.functionCall("unix_timestamp",
                    ImmutableList.of(arg0, DruidExpressions.stringLiteral(format)));
        }
    }

    public static class DruidFormUnixTimeOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        @Override
        public SqlOperator calciteOperator() {
            return HiveFromUnixTimeSqlOperator.INSTANCE;
        }

        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            if (call.getOperands().size() < 1 || call.getOperands().size() > 2) {
                throw new IllegalStateException(
                        "form_unixtime() requires 1 or 2 argument, got " + call.getOperands().size());
            }
            final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
            if (arg == null) {
                return null;
            }

            final String numMillis = DruidQuery.format("(%s * '1000')", arg);
            final String format = call.getOperands().size() == 1 ? DruidExpressions.stringLiteral(DEFAULT_TS_FORMAT)
                    : DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
            return DruidExpressions.functionCall("timestamp_format", ImmutableList.of(numMillis, format,
                    DruidExpressions.stringLiteral(TimeZone.getTimeZone("UTC").getID())));
        }
    }

    /**
     * Base class for Date Add/Sub operator conversion
     */
    public static class DruidDateArithmeticOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        private final int direction;
        private final SqlOperator operator;

        public DruidDateArithmeticOperatorConversion(int direction, SqlOperator operator) {
            this.direction = direction;
            this.operator = operator;
            Preconditions.checkArgument(direction == 1 || direction == -1);
        }

        @Override
        public SqlOperator calciteOperator() {
            return operator;
        }

        @Nullable
        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query) {
            final RexCall call = (RexCall) rexNode;
            if (call.getOperands().size() != 2) {
                throw new IllegalStateException(
                        "date_add/date_sub() requires 2 arguments, got " + call.getOperands().size());
            }
            final String arg0 = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
            final String arg1 = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
            if (arg0 == null || arg1 == null) {
                return null;
            }

            final String steps = direction == -1 ? DruidQuery.format("-( %s )", arg1) : arg1;
            return DruidExpressions.functionCall("timestamp_shift",
                    ImmutableList.of(arg0, DruidExpressions.stringLiteral("P1D"), steps,
                            DruidExpressions.stringLiteral(timezoneId(query, call.getOperands().get(0)).getID())));
        }
    }

    /**
     * utility function to extract timezone id from Druid query
     * @param query Druid Rel
     * @return time zone
     */
    private static TimeZone timezoneId(final DruidQuery query, final RexNode arg) {
        return arg.getType().getSqlTypeName() == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE
                ? TimeZone.getTimeZone(query.getTopNode().getCluster().getPlanner().getContext()
                        .unwrap(CalciteConnectionConfig.class).timeZone())
                : TimeZone.getTimeZone("UTC");
    }

    private static String applyTimestampFormat(String arg, String format, TimeZone timeZone) {
        return DruidExpressions.functionCall("timestamp_format", ImmutableList.of(arg,
                DruidExpressions.stringLiteral(format), DruidExpressions.stringLiteral(timeZone.getID())));
    }

    public static class UnaryFunctionOperatorConversion
            implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

        private final SqlOperator operator;
        private final String druidOperator;

        public UnaryFunctionOperatorConversion(SqlOperator operator, String druidOperator) {
            this.operator = operator;
            this.druidOperator = druidOperator;
        }

        @Override
        public SqlOperator calciteOperator() {
            return operator;
        }

        @Override
        public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery druidQuery) {
            final RexCall call = (RexCall) rexNode;

            final List<String> druidExpressions = DruidExpressions.toDruidExpressions(druidQuery, rowType,
                    call.getOperands());

            if (druidExpressions == null) {
                return null;
            }

            return DruidQuery.format("%s(%s)", druidOperator, Iterables.getOnlyElement(druidExpressions));
        }
    }
}