org.apache.impala.analysis.ToSqlUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.impala.analysis.ToSqlUtils.java

Source

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.impala.analysis;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.ql.parse.HiveLexer;

import org.apache.impala.catalog.CatalogException;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.Function;
import org.apache.impala.catalog.HBaseTable;
import org.apache.impala.catalog.HdfsCompression;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.catalog.KuduColumn;
import org.apache.impala.catalog.KuduTable;
import org.apache.impala.catalog.RowFormat;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.View;
import org.apache.impala.util.KuduUtil;

/**
 * Contains utility methods for creating SQL strings, for example,
 * for creating identifier strings that are compatible with Hive or Impala.
 */
public class ToSqlUtils {
    // Table properties to hide when generating the toSql() statement
    // EXTERNAL and comment are hidden because they are part of the toSql result, e.g.,
    // "CREATE EXTERNAL TABLE <name> ... COMMENT <comment> ..."
    private static final ImmutableSet<String> HIDDEN_TABLE_PROPERTIES = ImmutableSet.of("EXTERNAL", "comment");

    /**
     * Given an unquoted identifier string, returns an identifier lexable by
     * Impala and Hive, possibly by enclosing the original identifier in "`" quotes.
     * For example, Hive cannot parse its own auto-generated column
     * names "_c0", "_c1" etc. unless they are quoted. Impala and Hive keywords
     * must also be quoted.
     *
     * Impala's lexer recognizes a superset of the unquoted identifiers that Hive can.
     * At the same time, Impala's and Hive's list of keywords differ.
     * This method always returns an identifier that Impala and Hive can recognize,
     * although for some identifiers the quotes may not be strictly necessary for
     * one or the other system.
     */
    public static String getIdentSql(String ident) {
        boolean hiveNeedsQuotes = true;
        HiveLexer hiveLexer = new HiveLexer(new ANTLRStringStream(ident));
        try {
            Token t = hiveLexer.nextToken();
            // Check that the lexer recognizes an identifier and then EOF.
            boolean identFound = t.getType() == HiveLexer.Identifier;
            t = hiveLexer.nextToken();
            // No enclosing quotes are necessary for Hive.
            hiveNeedsQuotes = !(identFound && t.getType() == HiveLexer.EOF);
        } catch (Exception e) {
            // Ignore exception and just quote the identifier to be safe.
        }
        boolean isImpalaKeyword = SqlScanner.isKeyword(ident.toUpperCase());
        // Impala's scanner recognizes the ".123" portion of "db.123_tbl" as a decimal,
        // so while the quoting is not necessary for the given identifier itself, the quotes
        // are needed if this identifier will be preceded by a ".".
        boolean startsWithNumber = false;
        if (!hiveNeedsQuotes && !isImpalaKeyword) {
            startsWithNumber = Character.isDigit(ident.charAt(0));
        }
        if (hiveNeedsQuotes || isImpalaKeyword || startsWithNumber)
            return "`" + ident + "`";
        return ident;
    }

    public static List<String> getIdentSqlList(List<String> identList) {
        List<String> identSqlList = Lists.newArrayList();
        for (String ident : identList) {
            identSqlList.add(getIdentSql(ident));
        }
        return identSqlList;
    }

    public static String getPathSql(List<String> path) {
        StringBuilder result = new StringBuilder();
        for (String p : path) {
            if (result.length() > 0)
                result.append(".");
            result.append(getIdentSql(p));
        }
        return result.toString();
    }

    /**
     * Returns the "CREATE TABLE" SQL string corresponding to the given CreateTableStmt
     * statement.
     */
    public static String getCreateTableSql(CreateTableStmt stmt) {
        ArrayList<String> colsSql = Lists.newArrayList();
        for (ColumnDef col : stmt.getColumnDefs()) {
            colsSql.add(col.toString());
        }
        ArrayList<String> partitionColsSql = Lists.newArrayList();
        for (ColumnDef col : stmt.getPartitionColumnDefs()) {
            partitionColsSql.add(col.toString());
        }
        // TODO: Pass the correct compression, if applicable.
        return getCreateTableSql(stmt.getDb(), stmt.getTbl(), stmt.getComment(), colsSql, partitionColsSql,
                stmt.getTblPrimaryKeyColumnNames(), null, stmt.getTblProperties(), stmt.getSerdeProperties(),
                stmt.isExternal(), stmt.getIfNotExists(), stmt.getRowFormat(),
                HdfsFileFormat.fromThrift(stmt.getFileFormat()), HdfsCompression.NONE, null, stmt.getLocation());
    }

    /**
     * Returns the "CREATE TABLE" SQL string corresponding to the given
     * CreateTableAsSelectStmt statement.
     */
    public static String getCreateTableSql(CreateTableAsSelectStmt stmt) {
        CreateTableStmt innerStmt = stmt.getCreateStmt();
        // Only add partition column labels to output. Table columns must not be specified as
        // they are deduced from the select statement.
        ArrayList<String> partitionColsSql = Lists.newArrayList();
        for (ColumnDef col : innerStmt.getPartitionColumnDefs()) {
            partitionColsSql.add(col.getColName());
        }
        // TODO: Pass the correct compression, if applicable.
        String createTableSql = getCreateTableSql(innerStmt.getDb(), innerStmt.getTbl(), innerStmt.getComment(),
                null, partitionColsSql, innerStmt.getTblPrimaryKeyColumnNames(), null, innerStmt.getTblProperties(),
                innerStmt.getSerdeProperties(), innerStmt.isExternal(), innerStmt.getIfNotExists(),
                innerStmt.getRowFormat(), HdfsFileFormat.fromThrift(innerStmt.getFileFormat()),
                HdfsCompression.NONE, null, innerStmt.getLocation());
        return createTableSql + " AS " + stmt.getQueryStmt().toSql();
    }

    /**
     * Returns a "CREATE TABLE" or "CREATE VIEW" statement that creates the specified
     * table.
     */
    public static String getCreateTableSql(Table table) throws CatalogException {
        Preconditions.checkNotNull(table);
        if (table instanceof View)
            return getCreateViewSql((View) table);
        org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable();
        HashMap<String, String> properties = Maps.newHashMap(msTable.getParameters());
        if (properties.containsKey("transient_lastDdlTime")) {
            properties.remove("transient_lastDdlTime");
        }
        boolean isExternal = msTable.getTableType() != null
                && msTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString());
        String comment = properties.get("comment");
        for (String hiddenProperty : HIDDEN_TABLE_PROPERTIES) {
            properties.remove(hiddenProperty);
        }
        ArrayList<String> colsSql = Lists.newArrayList();
        ArrayList<String> partitionColsSql = Lists.newArrayList();
        boolean isHbaseTable = table instanceof HBaseTable;
        for (int i = 0; i < table.getColumns().size(); i++) {
            if (!isHbaseTable && i < table.getNumClusteringCols()) {
                partitionColsSql.add(columnToSql(table.getColumns().get(i)));
            } else {
                colsSql.add(columnToSql(table.getColumns().get(i)));
            }
        }
        RowFormat rowFormat = RowFormat.fromStorageDescriptor(msTable.getSd());
        HdfsFileFormat format = HdfsFileFormat.fromHdfsInputFormatClass(msTable.getSd().getInputFormat());
        HdfsCompression compression = HdfsCompression.fromHdfsInputFormatClass(msTable.getSd().getInputFormat());
        String location = isHbaseTable ? null : msTable.getSd().getLocation();
        Map<String, String> serdeParameters = msTable.getSd().getSerdeInfo().getParameters();

        String storageHandlerClassName = table.getStorageHandlerClassName();
        List<String> primaryKeySql = Lists.newArrayList();
        String kuduPartitionByParams = null;
        if (table instanceof KuduTable) {
            KuduTable kuduTable = (KuduTable) table;
            // Kudu tables don't use LOCATION syntax
            location = null;
            format = HdfsFileFormat.KUDU;
            // Kudu tables cannot use the Hive DDL syntax for the storage handler
            storageHandlerClassName = null;
            properties.remove(KuduTable.KEY_STORAGE_HANDLER);
            String kuduTableName = properties.get(KuduTable.KEY_TABLE_NAME);
            Preconditions.checkNotNull(kuduTableName);
            if (kuduTableName
                    .equals(KuduUtil.getDefaultCreateKuduTableName(table.getDb().getName(), table.getName()))) {
                properties.remove(KuduTable.KEY_TABLE_NAME);
            }
            // Internal property, should not be exposed to the user.
            properties.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);

            if (!isExternal) {
                primaryKeySql.addAll(kuduTable.getPrimaryKeyColumnNames());

                List<String> paramsSql = Lists.newArrayList();
                for (KuduPartitionParam param : kuduTable.getPartitionBy()) {
                    paramsSql.add(param.toSql());
                }
                kuduPartitionByParams = Joiner.on(", ").join(paramsSql);
            } else {
                // We shouldn't output the columns for external tables
                colsSql = null;
            }
        }
        HdfsUri tableLocation = location == null ? null : new HdfsUri(location);
        return getCreateTableSql(table.getDb().getName(), table.getName(), comment, colsSql, partitionColsSql,
                primaryKeySql, kuduPartitionByParams, properties, serdeParameters, isExternal, false, rowFormat,
                format, compression, storageHandlerClassName, tableLocation);
    }

    /**
     * Returns a "CREATE TABLE" string that creates the table with the specified properties.
     * The tableName must not be null. If columnsSql is null, the schema syntax will
     * not be generated.
     */
    public static String getCreateTableSql(String dbName, String tableName, String tableComment,
            List<String> columnsSql, List<String> partitionColumnsSql, List<String> primaryKeysSql,
            String kuduPartitionByParams, Map<String, String> tblProperties, Map<String, String> serdeParameters,
            boolean isExternal, boolean ifNotExists, RowFormat rowFormat, HdfsFileFormat fileFormat,
            HdfsCompression compression, String storageHandlerClass, HdfsUri location) {
        Preconditions.checkNotNull(tableName);
        StringBuilder sb = new StringBuilder("CREATE ");
        if (isExternal)
            sb.append("EXTERNAL ");
        sb.append("TABLE ");
        if (ifNotExists)
            sb.append("IF NOT EXISTS ");
        if (dbName != null)
            sb.append(dbName + ".");
        sb.append(tableName);
        if (columnsSql != null) {
            sb.append(" (\n  ");
            sb.append(Joiner.on(",\n  ").join(columnsSql));
            if (!primaryKeysSql.isEmpty()) {
                sb.append(",\n  PRIMARY KEY (");
                Joiner.on(", ").appendTo(sb, primaryKeysSql).append(")");
            }
            sb.append("\n)");
        }
        sb.append("\n");

        if (partitionColumnsSql != null && partitionColumnsSql.size() > 0) {
            sb.append(String.format("PARTITIONED BY (\n  %s\n)\n", Joiner.on(", \n  ").join(partitionColumnsSql)));
        }

        if (kuduPartitionByParams != null) {
            sb.append("PARTITION BY " + kuduPartitionByParams + "\n");
        }

        if (tableComment != null)
            sb.append(" COMMENT '" + tableComment + "'\n");

        if (rowFormat != null && !rowFormat.isDefault()) {
            sb.append("ROW FORMAT DELIMITED");
            if (rowFormat.getFieldDelimiter() != null) {
                String fieldDelim = StringEscapeUtils.escapeJava(rowFormat.getFieldDelimiter());
                sb.append(" FIELDS TERMINATED BY '" + fieldDelim + "'");
            }
            if (rowFormat.getEscapeChar() != null) {
                String escapeChar = StringEscapeUtils.escapeJava(rowFormat.getEscapeChar());
                sb.append(" ESCAPED BY '" + escapeChar + "'");
            }
            if (rowFormat.getLineDelimiter() != null) {
                String lineDelim = StringEscapeUtils.escapeJava(rowFormat.getLineDelimiter());
                sb.append(" LINES TERMINATED BY '" + lineDelim + "'");
            }
            sb.append("\n");
        }

        if (storageHandlerClass == null) {
            // TODO: Remove this special case when we have the LZO_TEXT writer
            // We must handle LZO_TEXT specially because Impala does not yet support creating
            // tables with this row format. In this case, we cannot output "WITH
            // SERDEPROPERTIES" because Hive does not support it with "STORED AS". For any
            // other HdfsFileFormat we want to output the serdeproperties because it is
            // supported by Impala.
            if (compression != HdfsCompression.LZO && compression != HdfsCompression.LZO_INDEX
                    && serdeParameters != null && !serdeParameters.isEmpty()) {
                sb.append("WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
            }

            if (fileFormat != null) {
                sb.append("STORED AS " + fileFormat.toSql(compression) + "\n");
            }
        } else {
            // If the storageHandlerClass is set, then we will generate the proper Hive DDL
            // because we do not yet support creating HBase tables via Impala.
            sb.append("STORED BY '" + storageHandlerClass + "'\n");
            if (serdeParameters != null && !serdeParameters.isEmpty()) {
                sb.append("WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
            }
        }
        if (location != null) {
            sb.append("LOCATION '" + location.toString() + "'\n");
        }
        if (tblProperties != null && !tblProperties.isEmpty()) {
            sb.append("TBLPROPERTIES " + propertyMapToSql(tblProperties));
        }
        return sb.toString();
    }

    public static String getCreateFunctionSql(List<Function> functions) {
        Preconditions.checkNotNull(functions);
        StringBuilder sb = new StringBuilder();
        for (Function fn : functions) {
            sb.append(fn.toSql(false));
        }
        return sb.toString();
    }

    public static String getCreateViewSql(View view) {
        StringBuffer sb = new StringBuffer();
        sb.append("CREATE VIEW ");
        // Use toSql() to ensure that the table name and query statement are normalized
        // and identifiers are quoted.
        sb.append(view.getTableName().toSql());
        sb.append(" AS\n");
        sb.append(view.getQueryStmt().toSql());
        return sb.toString();
    }

    private static String columnToSql(Column col) {
        StringBuilder sb = new StringBuilder(col.getName());
        if (col.getType() != null)
            sb.append(" " + col.getType().toSql());
        if (col instanceof KuduColumn) {
            KuduColumn kuduCol = (KuduColumn) col;
            Boolean isNullable = kuduCol.isNullable();
            if (isNullable != null)
                sb.append(isNullable ? " NULL" : " NOT NULL");
            if (kuduCol.getEncoding() != null)
                sb.append(" ENCODING " + kuduCol.getEncoding());
            if (kuduCol.getCompression() != null) {
                sb.append(" COMPRESSION " + kuduCol.getCompression());
            }
            if (kuduCol.getDefaultValue() != null) {
                sb.append(" DEFAULT " + kuduCol.getDefaultValue().toSql());
            }
            if (kuduCol.getBlockSize() != 0) {
                sb.append(String.format(" BLOCK_SIZE %d", kuduCol.getBlockSize()));
            }
        }
        if (!Strings.isNullOrEmpty(col.getComment())) {
            sb.append(String.format(" COMMENT '%s'", col.getComment()));
        }
        return sb.toString();
    }

    private static String propertyMapToSql(Map<String, String> propertyMap) {
        List<String> properties = Lists.newArrayList();
        for (Map.Entry<String, String> entry : propertyMap.entrySet()) {
            properties.add(String.format("'%s'='%s'", entry.getKey(),
                    // Properties may contain characters that need to be escaped.
                    // e.g. If the row format escape delimiter is '\', the map of serde properties
                    // from the metastore table will contain 'escape.delim' => '\', which is not
                    // properly escaped.
                    StringEscapeUtils.escapeJava(entry.getValue())));
        }
        return "(" + Joiner.on(", ").join(properties) + ")";
    }

    /**
     * Returns a SQL representation of the given list of hints. Uses the end-of-line
     * commented plan hint style such that hinted views created by Impala are readable by
     * Hive (parsed as a comment by Hive).
     */
    public static String getPlanHintsSql(List<PlanHint> hints) {
        Preconditions.checkNotNull(hints);
        if (hints.isEmpty())
            return "";
        StringBuilder sb = new StringBuilder();
        sb.append("\n-- +");
        sb.append(Joiner.on(",").join(hints));
        sb.append("\n");
        return sb.toString();
    }
}