Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lens.cube.parse; import static org.apache.hadoop.hive.ql.parse.HiveParser.*; import java.util.HashSet; import java.util.Set; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.parse.HQLParser.ASTNodeVisitor; import org.apache.lens.cube.parse.HQLParser.TreeNode; import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import com.google.common.base.Optional; class ColumnResolver implements ContextRewriter { @Override public void rewriteContext(CubeQueryContext cubeql) throws LensException { checkForAllColumnsSelected(cubeql); extractColumns(cubeql); } private void checkForAllColumnsSelected(CubeQueryContext cubeql) throws LensException { // Check if its 'select * from...' ASTNode selTree = cubeql.getSelectAST(); if (selTree.getChildCount() == 1) { ASTNode star = HQLParser.findNodeByPath(selTree, TOK_SELEXPR, TOK_ALLCOLREF); if (star == null) { star = HQLParser.findNodeByPath(selTree, TOK_SELEXPR, TOK_FUNCTIONSTAR); } if (star != null) { int starType = star.getToken().getType(); if (TOK_FUNCTIONSTAR == starType || TOK_ALLCOLREF == starType) { throw new LensException(LensCubeErrorCode.ALL_COLUMNS_NOT_SUPPORTED.getLensErrorInfo()); } } } } private void extractColumns(CubeQueryContext cubeql) throws LensException { getColsForSelectTree(cubeql); getColsForWhereTree(cubeql); getColsForAST(cubeql, cubeql.getJoinAST()); getColsForAST(cubeql, cubeql.getGroupByAST()); getColsForHavingAST(cubeql, cubeql.getHavingAST()); getColsForAST(cubeql, cubeql.getOrderByAST()); // Update join dimension tables for (String table : cubeql.getTblAliasToColumns().keySet()) { if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table)) { if (!cubeql.addQueriedTable(table)) { throw new LensException(LensCubeErrorCode.NEITHER_CUBE_NOR_DIMENSION.getLensErrorInfo()); } } } } private void getColsForAST(CubeQueryContext cubeql, ASTNode clause) throws LensException { if (clause == null) { return; } for (int i = 0; i < clause.getChildCount(); i++) { ASTNode queriedExpr = (ASTNode) clause.getChild(i); QueriedPhraseContext qur = new QueriedPhraseContext(queriedExpr); getColsForTree(cubeql, queriedExpr, qur, true); cubeql.addColumnsQueried(qur.getTblAliasToColumns()); cubeql.addQueriedPhrase(qur); } } private void getColsForHavingAST(CubeQueryContext cubeql, ASTNode clause) throws LensException { if (clause == null) { return; } // split having clause phrases to be column level sothat having clause can be pushed to multiple facts if required. if (HQLParser.isAggregateAST(clause) || clause.getType() == HiveParser.TOK_TABLE_OR_COL || clause.getType() == HiveParser.DOT || clause.getChildCount() == 0) { QueriedPhraseContext qur = new QueriedPhraseContext(clause); qur.setAggregate(true); getColsForTree(cubeql, clause, qur, true); cubeql.addColumnsQueried(qur.getTblAliasToColumns()); cubeql.addQueriedPhrase(qur); } else { for (Node child : clause.getChildren()) { getColsForHavingAST(cubeql, (ASTNode) child); } } } // finds columns in AST passed. static void getColsForTree(final CubeQueryContext cubeql, ASTNode tree, final TrackQueriedColumns tqc, final boolean skipAliases) throws LensException { if (tree == null) { return; } // Traverse the tree to get column names // We are doing a complete traversal so that expressions of columns // are also captured ex: f(cola + colb/tab1.colc) HQLParser.bft(tree, new ASTNodeVisitor() { @Override public void visit(TreeNode visited) { ASTNode node = visited.getNode(); ASTNode parent = null; if (visited.getParent() != null) { parent = visited.getParent().getNode(); } if (node.getToken().getType() == TOK_TABLE_OR_COL && (parent == null || parent.getToken().getType() != DOT)) { // Take child ident.totext ASTNode ident = (ASTNode) node.getChild(0); String column = ident.getText().toLowerCase(); if (skipAliases && cubeql.isColumnAnAlias(column)) { // column is an existing alias return; } tqc.addColumnsQueried(CubeQueryContext.DEFAULT_TABLE, column); } else if (node.getToken().getType() == DOT) { // This is for the case where column name is prefixed by table name // or table alias // For example 'select fact.id, dim2.id ...' // Right child is the column name, left child.ident is table name ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier); ASTNode colIdent = (ASTNode) node.getChild(1); String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); tqc.addColumnsQueried(table, column); } } }); } // find columns in where tree // if where expression is timerange function, then time range columns are // added // only if timerange clause shouldn't be replaced with its corresponding // partition column private void getColsForWhereTree(final CubeQueryContext cubeql) throws LensException { if (cubeql.getWhereAST() == null) { return; } for (int i = 0; i < cubeql.getWhereAST().getChildCount(); i++) { ASTNode queriedExpr = (ASTNode) cubeql.getWhereAST().getChild(i); QueriedPhraseContext qur = new QueriedPhraseContext(queriedExpr); addColumnsForWhere(cubeql, qur, queriedExpr, cubeql.getWhereAST()); cubeql.addColumnsQueried(qur.getTblAliasToColumns()); cubeql.addQueriedPhrase(qur); } } // Find all columns of select tree. // Finds columns in each select expression. // // Updates alias for each selected expression. // Alias is updated as follows: // Case 1: If select expression does not have an alias // ** And the expression is the column queried, the column name is put as select alias. // ** If the expression is not just simple column queried, the alias is // constructed as 'expr' + index of the expression. // Case 2: If select expression already has alias // ** Adds it to exprToAlias map // ** and the alias is constructed as 'expr' + index of the expression. // and user given alias is the final alias of the expression. private static final String SELECT_ALIAS_PREFIX = "expr"; private void getColsForSelectTree(final CubeQueryContext cubeql) throws LensException { int exprInd = 1; for (int i = 0; i < cubeql.getSelectAST().getChildCount(); i++) { ASTNode selectExpr = (ASTNode) cubeql.getSelectAST().getChild(i); ASTNode selectExprChild = (ASTNode) selectExpr.getChild(0); Set<String> cols = new HashSet<>(); SelectPhraseContext sel = new SelectPhraseContext(selectExpr); addColumnsForSelectExpr(sel, selectExpr, cubeql.getSelectAST(), cols); String alias = selectExpr.getChildCount() > 1 ? selectExpr.getChild(1).getText() : null; String selectAlias; String selectFinalAlias = null; if (alias != null) { selectFinalAlias = alias; selectAlias = SELECT_ALIAS_PREFIX + exprInd; } else if (cols.size() == 1 && (selectExprChild.getToken().getType() == TOK_TABLE_OR_COL || selectExprChild.getToken().getType() == DOT)) { // select expression is same as the column selectAlias = cols.iterator().next().toLowerCase(); } else { selectAlias = SELECT_ALIAS_PREFIX + exprInd; selectFinalAlias = HQLParser.getString(selectExprChild); } exprInd++; cubeql.addColumnsQueried(sel.getTblAliasToColumns()); sel.setSelectAlias(selectAlias); sel.setFinalAlias(!StringUtils.isBlank(selectFinalAlias) ? "`" + selectFinalAlias + "`" : selectAlias); sel.setActualAlias(alias != null ? alias.toLowerCase() : null); cubeql.addSelectPhrase(sel); } } private static void addColumnsForWhere(final CubeQueryContext cubeql, QueriedPhraseContext qur, ASTNode node, ASTNode parent) { if (node.getToken().getType() == TOK_TABLE_OR_COL && (parent != null && parent.getToken().getType() != DOT)) { // Take child ident.totext ASTNode ident = (ASTNode) node.getChild(0); String column = ident.getText().toLowerCase(); if (cubeql.isColumnAnAlias(column)) { // column is an existing alias return; } addColumnQueriedWithTimeRangeFuncCheck(cubeql, qur, parent, CubeQueryContext.DEFAULT_TABLE, column); } else if (node.getToken().getType() == DOT) { // This is for the case where column name is prefixed by table name // or table alias // For example 'select fact.id, dim2.id ...' // Right child is the column name, left child.ident is table name ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier); ASTNode colIdent = (ASTNode) node.getChild(1); String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); addColumnQueriedWithTimeRangeFuncCheck(cubeql, qur, parent, table, column); } else if (node.getToken().getType() == TOK_FUNCTION) { ASTNode fname = HQLParser.findNodeByPath(node, Identifier); if (fname != null && CubeQueryContext.TIME_RANGE_FUNC.equalsIgnoreCase(fname.getText())) { addColumnsForWhere(cubeql, qur, (ASTNode) node.getChild(1), node); } else { for (int i = 0; i < node.getChildCount(); i++) { addColumnsForWhere(cubeql, qur, (ASTNode) node.getChild(i), node); } } } else { for (int i = 0; i < node.getChildCount(); i++) { addColumnsForWhere(cubeql, qur, (ASTNode) node.getChild(i), node); } } } private static void addColumnQueriedWithTimeRangeFuncCheck(final CubeQueryContext cubeql, QueriedPhraseContext qur, final ASTNode parent, final String table, final String column) { if (isTimeRangeFunc(parent)) { cubeql.addQueriedTimeDimensionCols(column); cubeql.addColumnsQueriedWithTimeDimCheck(qur, CubeQueryContext.DEFAULT_TABLE, column); } else { qur.addColumnsQueried(table, column); } } private static boolean isTimeRangeFunc(final ASTNode node) { Optional<String> funcNameOp = getNameIfFunc(node); final String funcName = funcNameOp.isPresent() ? funcNameOp.get() : null; return CubeQueryContext.TIME_RANGE_FUNC.equalsIgnoreCase(funcName); } private static Optional<String> getNameIfFunc(final ASTNode node) { String funcName = null; if (node.getToken().getType() == TOK_FUNCTION) { ASTNode foundNode = HQLParser.findNodeByPath(node, Identifier); if (foundNode != null) { funcName = foundNode.getText(); } } return Optional.fromNullable(funcName); } static void addColumnsForSelectExpr(final TrackQueriedColumns sel, ASTNode node, ASTNode parent, Set<String> cols) { if (node.getToken().getType() == TOK_TABLE_OR_COL && (parent != null && parent.getToken().getType() != DOT)) { // Take child ident.totext ASTNode ident = (ASTNode) node.getChild(0); String column = ident.getText().toLowerCase(); sel.addColumnsQueried(CubeQueryContext.DEFAULT_TABLE, column); cols.add(column); } else if (node.getToken().getType() == DOT) { // This is for the case where column name is prefixed by table name // or table alias // For example 'select fact.id, dim2.id ...' // Right child is the column name, left child.ident is table name ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier); ASTNode colIdent = (ASTNode) node.getChild(1); String column = colIdent.getText().toLowerCase(); String table = tabident.getText().toLowerCase(); sel.addColumnsQueried(table, column); cols.add(column); } else { for (int i = 0; i < node.getChildCount(); i++) { addColumnsForSelectExpr(sel, (ASTNode) node.getChild(i), node, cols); } } } }