org.apache.hadoop.hive.ql.metadata.formatting.MetaDataPrettyFormatUtils.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.metadata.formatting.MetaDataPrettyFormatUtils.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.metadata.formatting;

import java.util.List;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;

/**
 * This class provides methods to format the output of DESCRIBE PRETTY
 * in a human-readable way.
 */
public final class MetaDataPrettyFormatUtils {

    public static final int PRETTY_MAX_INTERCOL_SPACING = 4;
    private static final int PRETTY_ALIGNMENT = 10;
    /**
     * Minimum length of the comment column. This is relevant only when the terminal width
     * or hive.cli.pretty.output.num.cols is too small, or when there are very large column
     * names.
     * 10 was arbitrarily chosen.
     */
    private static final int MIN_COMMENT_COLUMN_LEN = 10;

    private MetaDataPrettyFormatUtils() {
    }

    /**
     * @param prettyOutputNumCols The pretty output is formatted to fit within
     * these many columns.
     */
    public static String getAllColumnsInformation(List<FieldSchema> cols, List<FieldSchema> partCols,
            int prettyOutputNumCols) {
        StringBuilder columnInformation = new StringBuilder(MetaDataFormatUtils.DEFAULT_STRINGBUILDER_SIZE);
        int maxColNameLen = findMaxColumnNameLen(cols);
        formatColumnsHeaderPretty(columnInformation, maxColNameLen, prettyOutputNumCols);
        formatAllFieldsPretty(columnInformation, cols, maxColNameLen, prettyOutputNumCols);

        if ((partCols != null) && (!partCols.isEmpty())) {
            columnInformation.append(MetaDataFormatUtils.LINE_DELIM).append("# Partition Information")
                    .append(MetaDataFormatUtils.LINE_DELIM);
            formatColumnsHeaderPretty(columnInformation, maxColNameLen, prettyOutputNumCols);
            formatAllFieldsPretty(columnInformation, partCols, maxColNameLen, prettyOutputNumCols);
        }

        return columnInformation.toString();
    }

    /**
     * Find the length of the largest column name.
     */
    private static int findMaxColumnNameLen(List<FieldSchema> cols) {
        int maxLen = -1;
        for (FieldSchema col : cols) {
            int colNameLen = col.getName().length();
            if (colNameLen > maxLen) {
                maxLen = colNameLen;
            }
        }
        return maxLen;
    }

    /**
     * @param maxColNameLen The length of the largest column name
     */
    private static void formatColumnsHeaderPretty(StringBuilder columnInformation, int maxColNameLen,
            int prettyOutputNumCols) {
        String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader(null);
        formatOutputPretty(columnHeaders[0], columnHeaders[1], columnHeaders[2], columnInformation, maxColNameLen,
                prettyOutputNumCols);
        columnInformation.append(MetaDataFormatUtils.FIELD_DELIM).append(MetaDataFormatUtils.FIELD_DELIM)
                .append(MetaDataFormatUtils.LINE_DELIM);
    }

    private static void formatAllFieldsPretty(StringBuilder tableInfo, List<FieldSchema> cols, int maxColNameLen,
            int prettyOutputNumCols) {
        for (FieldSchema col : cols) {
            formatOutputPretty(col.getName(), col.getType(), MetaDataFormatUtils.getComment(col), tableInfo,
                    maxColNameLen, prettyOutputNumCols);
        }
    }

    /**
     * If the specified comment is too long, add line breaks at appropriate
     * locations.  Note that the comment may already include line-breaks
     * specified by the user at table creation time.
     * @param columnsAlreadyConsumed The number of columns on the current line
     * that have already been consumed by the column name, column type and
     * and the surrounding delimiters.
     * @return The comment with line breaks added at appropriate locations.
     */
    private static String breakCommentIntoMultipleLines(String comment, int columnsAlreadyConsumed,
            int prettyOutputNumCols) {

        if (prettyOutputNumCols == -1) {
            // XXX fixed to 80 to remove jline dep
            prettyOutputNumCols = 80 - 1;
        }

        int commentNumCols = prettyOutputNumCols - columnsAlreadyConsumed;
        if (commentNumCols < MIN_COMMENT_COLUMN_LEN) {
            commentNumCols = MIN_COMMENT_COLUMN_LEN;
        }

        // Track the number of columns allocated for the comment that have
        // already been consumed on the current line.
        int commentNumColsConsumed = 0;

        StringTokenizer st = new StringTokenizer(comment, " \t\n\r\f", true);
        // We use a StringTokenizer instead of a BreakIterator, because
        // table comments often contain text that looks like code. For eg:
        // 'Type0' => 0, // This is Type 0
        // 'Type1' => 1, // This is Type 1
        // BreakIterator is meant for regular text, and was found to give
        // bad line breaks when we tried it out.

        StringBuilder commentBuilder = new StringBuilder(comment.length());
        while (st.hasMoreTokens()) {
            String currWord = st.nextToken();
            if (currWord.equals("\n") || currWord.equals("\r") || currWord.equals("\f")) {
                commentBuilder.append(currWord);
                commentNumColsConsumed = 0;
                continue;
            }
            if (commentNumColsConsumed + currWord.length() > commentNumCols) {
                // currWord won't fit on the current line
                if (currWord.length() > commentNumCols) {
                    // currWord is too long to split on a line even all by itself.
                    // Hence we have no option but to split it.  The first chunk
                    // will go to the end of the current line.  Subsequent chunks
                    // will be of length commentNumCols.  The last chunk
                    // may be smaller.
                    while (currWord.length() > commentNumCols) {
                        int remainingLineLen = commentNumCols - commentNumColsConsumed;
                        String wordChunk = currWord.substring(0, remainingLineLen);
                        commentBuilder.append(wordChunk);
                        commentBuilder.append(MetaDataFormatUtils.LINE_DELIM);
                        commentNumColsConsumed = 0;
                        currWord = currWord.substring(remainingLineLen);
                    }
                    // Handle the last chunk
                    if (currWord.length() > 0) {
                        commentBuilder.append(currWord);
                        commentNumColsConsumed = currWord.length();
                    }
                } else {
                    // Start on a new line
                    commentBuilder.append(MetaDataFormatUtils.LINE_DELIM);
                    if (!currWord.equals(" ")) {
                        // When starting a new line, do not start with a space.
                        commentBuilder.append(currWord);
                        commentNumColsConsumed = currWord.length();
                    } else {
                        commentNumColsConsumed = 0;
                    }
                }
            } else {
                commentBuilder.append(currWord);
                commentNumColsConsumed += currWord.length();
            }
        }
        return commentBuilder.toString();
    }

    /**
     * Appends the specified text with alignment to sb.
     * Also appends an appopriately sized delimiter.
     * @return The number of columns consumed by the aligned string and the
     * delimiter.
     */
    private static int appendFormattedColumn(StringBuilder sb, String text, int alignment) {
        String paddedText = String.format("%-" + alignment + "s", text);
        int delimCount = 0;
        if (paddedText.length() < alignment + PRETTY_MAX_INTERCOL_SPACING) {
            delimCount = (alignment + PRETTY_MAX_INTERCOL_SPACING) - paddedText.length();
        } else {
            delimCount = PRETTY_MAX_INTERCOL_SPACING;
        }
        String delim = StringUtils.repeat(" ", delimCount);
        sb.append(paddedText);
        sb.append(delim);
        sb.append(MetaDataFormatUtils.FIELD_DELIM);

        return paddedText.length() + delim.length();
    }

    private static void formatOutputPretty(String colName, String colType, String colComment,
            StringBuilder tableInfo, int maxColNameLength, int prettyOutputNumCols) {
        int colsNameConsumed = appendFormattedColumn(tableInfo, colName, maxColNameLength + 1);
        int colsTypeConsumed = appendFormattedColumn(tableInfo, colType, PRETTY_ALIGNMENT);

        colComment = breakCommentIntoMultipleLines(colComment, colsNameConsumed + colsTypeConsumed,
                prettyOutputNumCols);

        /* Comment indent processing for multi-line comments.
         * Comments should be indented the same amount on each line
         * if the first line comment starts indented by k,
         * the following line comments should also be indented by k
         * The following line comments will as a new line,so we need to
         * add colsNameConsumed spaces as the first column and
         * colsTypeConsumed spaces as the second column and the
         * comment as the last column.we use two FIELD_DELIM to
         * split them.
         */
        String[] commentSegments = colComment.split("\n|\r|\r\n");
        tableInfo.append(trimTrailingWS(commentSegments[0]));
        tableInfo.append(MetaDataFormatUtils.LINE_DELIM);
        for (int i = 1; i < commentSegments.length; i++) {
            tableInfo.append(String.format("%" + colsNameConsumed + "s" + MetaDataFormatUtils.FIELD_DELIM + "%"
                    + colsTypeConsumed + "s" + MetaDataFormatUtils.FIELD_DELIM + "%s", "", "", commentSegments[i]));
            tableInfo.append(MetaDataFormatUtils.LINE_DELIM);
        }
    }

    private static String trimTrailingWS(String str) {
        return str.replaceAll("\\s+$", "");
    }
}