com.cloudera.impala.analysis.ToSqlUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.impala.analysis.ToSqlUtils.java

Source

// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.cloudera.impala.analysis;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.Token;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.ql.parse.HiveLexer;

import com.cloudera.impala.catalog.CatalogException;
import com.cloudera.impala.catalog.Column;
import com.cloudera.impala.catalog.HBaseTable;
import com.cloudera.impala.catalog.HdfsCompression;
import com.cloudera.impala.catalog.HdfsFileFormat;
import com.cloudera.impala.catalog.RowFormat;
import com.cloudera.impala.catalog.Table;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
 * Contains utility methods for creating SQL strings, for example,
 * for creating identifier strings that are compatible with Hive or Impala.
 */
public class ToSqlUtils {
    // Table properties to hide when generating the toSql() statement
    // EXTERNAL and comment are hidden because they are part of the toSql result, e.g.,
    // "CREATE EXTERNAL TABLE <name> ... COMMENT <comment> ..."
    private static final ImmutableSet<String> HIDDEN_TABLE_PROPERTIES = ImmutableSet.of("EXTERNAL", "comment");

    /**
     * Given an unquoted identifier string, returns an identifier lexable by
     * Impala and Hive, possibly by enclosing the original identifier in "`" quotes.
     * For example, Hive cannot parse its own auto-generated column
     * names "_c0", "_c1" etc. unless they are quoted. Impala and Hive keywords
     * must also be quoted.
     *
     * Impala's lexer recognizes a superset of the unquoted identifiers that Hive can.
     * At the same time, Impala's and Hive's list of keywords differ.
     * This method always returns an identifier that Impala and Hive can recognize,
     * although for some identifiers the quotes may not be strictly necessary for
     * one or the other system.
     */
    public static String getIdentSql(String ident) {
        boolean hiveNeedsQuotes = true;
        HiveLexer hiveLexer = new HiveLexer(new ANTLRStringStream(ident));
        try {
            Token t = hiveLexer.nextToken();
            // Check that the lexer recognizes an identifier and then EOF.
            boolean identFound = t.getType() == HiveLexer.Identifier;
            t = hiveLexer.nextToken();
            // No enclosing quotes are necessary for Hive.
            hiveNeedsQuotes = !(identFound && t.getType() == HiveLexer.EOF);
        } catch (Exception e) {
            // Ignore exception and just quote the identifier to be safe.
        }
        boolean isImpalaKeyword = SqlScanner.isKeyword(ident.toUpperCase());
        if (hiveNeedsQuotes || isImpalaKeyword)
            return "`" + ident + "`";
        return ident;
    }

    /**
     * Returns the "CREATE TABLE" SQL string corresponding to the given CreateTableStmt.
     * statement.
     */
    public static String getCreateTableSql(CreateTableStmt stmt) {
        ArrayList<String> colsSql = Lists.newArrayList();
        for (ColumnDesc col : stmt.getColumnDefs()) {
            colsSql.add(col.toString());
        }
        ArrayList<String> partitionColsSql = Lists.newArrayList();
        for (ColumnDesc col : stmt.getPartitionColumnDefs()) {
            partitionColsSql.add(col.toString());
        }
        // TODO: Pass the correct compression, if applicable.
        return getCreateTableSql(stmt.getDb(), stmt.getTbl(), stmt.getComment(), colsSql, partitionColsSql,
                stmt.getTblProperties(), stmt.getSerdeProperties(), stmt.isExternal(), stmt.getIfNotExists(),
                stmt.getRowFormat(), HdfsFileFormat.fromThrift(stmt.getFileFormat()), HdfsCompression.NONE, null,
                stmt.getLocation().toString());
    }

    /**
     * Returns a "CREATE TABLE" statement that creates the specified table.
     */
    public static String getCreateTableSql(Table table) throws CatalogException {
        Preconditions.checkNotNull(table);
        org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable();
        HashMap<String, String> properties = Maps.newHashMap(msTable.getParameters());
        boolean isExternal = msTable.getTableType() != null
                && msTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString());
        String comment = properties.get("comment");
        for (String hiddenProperty : HIDDEN_TABLE_PROPERTIES) {
            properties.remove(hiddenProperty);
        }
        ArrayList<String> colsSql = Lists.newArrayList();
        ArrayList<String> partitionColsSql = Lists.newArrayList();
        boolean isHbaseTable = table instanceof HBaseTable;
        for (int i = 0; i < table.getColumns().size(); i++) {
            if (!isHbaseTable && i < table.getNumClusteringCols()) {
                partitionColsSql.add(columnToSql(table.getColumns().get(i)));
            } else {
                colsSql.add(columnToSql(table.getColumns().get(i)));
            }
        }
        RowFormat rowFormat = RowFormat.fromStorageDescriptor(msTable.getSd());
        HdfsFileFormat format = HdfsFileFormat.fromHdfsInputFormatClass(msTable.getSd().getInputFormat());
        HdfsCompression compression = HdfsCompression.fromHdfsInputFormatClass(msTable.getSd().getInputFormat());
        String location = isHbaseTable ? null : msTable.getSd().getLocation();
        Map<String, String> serdeParameters = msTable.getSd().getSerdeInfo().getParameters();
        return getCreateTableSql(table.getDb().getName(), table.getName(), comment, colsSql, partitionColsSql,
                properties, serdeParameters, isExternal, false, rowFormat, format, compression,
                table.getStorageHandlerClassName(), location);
    }

    /**
     * Returns a "CREATE TABLE" string that creates the table with the specified properties.
     * The tableName must not be null. If columnsSql is null, the schema syntax will
     * not be generated.
     */
    public static String getCreateTableSql(String dbName, String tableName, String tableComment,
            List<String> columnsSql, List<String> partitionColumnsSql, Map<String, String> tblProperties,
            Map<String, String> serdeParameters, boolean isExternal, boolean ifNotExists, RowFormat rowFormat,
            HdfsFileFormat fileFormat, HdfsCompression compression, String storageHandlerClass, String location) {
        Preconditions.checkNotNull(tableName);
        StringBuilder sb = new StringBuilder("CREATE ");
        if (isExternal)
            sb.append("EXTERNAL ");
        sb.append("TABLE ");
        if (ifNotExists)
            sb.append("IF NOT EXISTS ");
        if (dbName != null)
            sb.append(dbName + ".");
        if (columnsSql != null) {
            sb.append(tableName + " (\n  ");
            sb.append(Joiner.on(", \n  ").join(columnsSql));
            sb.append("\n)");
        }
        sb.append("\n");
        if (tableComment != null)
            sb.append(" COMMENT '" + tableComment + "'\n");

        if (partitionColumnsSql != null && partitionColumnsSql.size() > 0) {
            sb.append(String.format("PARTITIONED BY (\n  %s\n)\n", Joiner.on(", \n  ").join(partitionColumnsSql)));
        }

        if (rowFormat != null && !rowFormat.isDefault()) {
            sb.append("ROW FORMAT DELIMITED");
            if (rowFormat.getFieldDelimiter() != null) {
                String fieldDelim = StringEscapeUtils.escapeJava(rowFormat.getFieldDelimiter());
                sb.append(" FIELDS TERMINATED BY '" + fieldDelim + "'");
            }
            if (rowFormat.getEscapeChar() != null) {
                String escapeChar = StringEscapeUtils.escapeJava(rowFormat.getEscapeChar());
                sb.append(" ESCAPED BY '" + escapeChar + "'");
            }
            if (rowFormat.getLineDelimiter() != null) {
                String lineDelim = StringEscapeUtils.escapeJava(rowFormat.getLineDelimiter());
                sb.append(" LINES TERMINATED BY '" + lineDelim + "'");
            }
            sb.append("\n");
        }

        if (storageHandlerClass == null) {
            // TODO: Remove this special case when we have the LZO_TEXT writer
            // We must handle LZO_TEXT specially because Impala does not yet support creating
            // tables with this row format. In this case, we cannot output "WITH
            // SERDEPROPERTIES" because Hive does not support it with "STORED AS". For any
            // other HdfsFileFormat we want to output the serdeproperties because it is
            // supported by Impala.
            if (compression != HdfsCompression.LZO && compression != HdfsCompression.LZO_INDEX
                    && serdeParameters != null && !serdeParameters.isEmpty()) {
                sb.append("WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
            }

            if (fileFormat != null) {
                sb.append("STORED AS " + fileFormat.toSql(compression) + "\n");
            }
        } else {
            // If the storageHandlerClass is set, then we will generate the proper Hive DDL
            // because we do not yet support creating HBase tables via Impala.
            sb.append("STORED BY '" + storageHandlerClass + "'\n");
            if (serdeParameters != null && !serdeParameters.isEmpty()) {
                sb.append("WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
            }
        }
        if (location != null) {
            sb.append("LOCATION '" + location + "'\n");
        }
        if (tblProperties != null && !tblProperties.isEmpty()) {
            sb.append("TBLPROPERTIES " + propertyMapToSql(tblProperties));
        }
        return sb.toString();
    }

    private static String columnToSql(Column col) {
        StringBuilder sb = new StringBuilder(col.getName());
        if (col.getType() != null)
            sb.append(" " + col.getType().toSql());
        if (!Strings.isNullOrEmpty(col.getComment())) {
            sb.append(String.format(" COMMENT '%s'", col.getComment()));
        }
        return sb.toString();
    }

    private static String propertyMapToSql(Map<String, String> propertyMap) {
        List<String> properties = Lists.newArrayList();
        for (Map.Entry<String, String> entry : propertyMap.entrySet()) {
            properties.add(String.format("'%s'='%s'", entry.getKey(),
                    // Properties may contain characters that need to be escaped.
                    // e.g. If the row format escape delimiter is '\', the map of serde properties
                    // from the metastore table will contain 'escape.delim' => '\', which is not
                    // properly escaped.
                    StringEscapeUtils.escapeJava(entry.getValue())));
        }
        return "(" + Joiner.on(", ").join(properties) + ")";
    }

    /**
     * Returns a SQL representation of the given list of hints. Uses the end-of-line
     * commented plan hint style such that hinted views created by Impala are readable by
     * Hive (parsed as a comment by Hive).
     */
    public static String getPlanHintsSql(List<String> hints) {
        if (hints == null || hints.isEmpty())
            return "";
        StringBuilder sb = new StringBuilder();
        sb.append("\n-- +");
        sb.append(Joiner.on(",").join(hints));
        sb.append("\n");
        return sb.toString();
    }
}