org.apache.hadoop.hive.jdbc.storagehandler.JdbcStorageHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.jdbc.storagehandler.JdbcStorageHandler.java

Source

/*
 * Copyright 2016 Axibase Corporation or its affiliates. All Rights Reserved.
 * Copyright 2013-2015 Qubole
 * Copyright 2013-2015 Makoto YUI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.jdbc.storagehandler;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;

import java.util.*;

/**
 * -- required settings set mapred.jdbc.driver.class=..; set mapred.jdbc.url=..;
 * 
 * -- optional settings set mapred.jdbc.username=..; set
 * mapred.jdbc.password=..;
 * 
 * @see org.apache.hadoop.mapred.lib.db.DBConfiguration
 * @see org.apache.hadoop.mapred.lib.db.DBInputFormat
 * @see org.apache.hadoop.mapred.lib.db.DBOutputFormat
 */
public class JdbcStorageHandler extends DefaultStorageHandler implements HiveStoragePredicateHandler {
    private static final Log LOG = LogFactory.getLog(JdbcStorageHandler.class);

    private Configuration conf;

    public JdbcStorageHandler() {
    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    @Override
    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    @Override
    public HiveMetaHook getMetaHook() {
        return new JDBCHook();
    }

    @Override
    public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        configureJobProperties(tableDesc, jobProperties);
    }

    @Override
    public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        configureJobProperties(tableDesc, jobProperties);
    }

    @Override
    public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        configureJobProperties(tableDesc, jobProperties);
    }

    private void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("tabelDesc: " + tableDesc);
            LOG.debug("jobProperties: " + jobProperties);
        }

        String tblName = tableDesc.getTableName();
        Properties tblProps = tableDesc.getProperties();
        String columnNames = tblProps.getProperty(Constants.LIST_COLUMNS);
        if (columnNames.length() == 0) {
            tblProps.setProperty(Constants.LIST_COLUMN_TYPES, JdbcSerDeHelper.columnTypeNames);
            tblProps.setProperty(Constants.LIST_COLUMNS, JdbcSerDeHelper.columnNames);
            columnNames = tblProps.getProperty(Constants.LIST_COLUMNS);
        }
        // Setting both mapred and mapreduce properties
        jobProperties.put(org.apache.hadoop.mapred.lib.db.DBConfiguration.INPUT_CLASS_PROPERTY,
                DbRecordWritable.class.getName());
        jobProperties.put(org.apache.hadoop.mapred.lib.db.DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tblName);
        jobProperties.put(org.apache.hadoop.mapred.lib.db.DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tblName);
        jobProperties.put(org.apache.hadoop.mapred.lib.db.DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, columnNames);
        jobProperties.put(org.apache.hadoop.mapred.lib.db.DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, columnNames);

        jobProperties.put(org.apache.hadoop.mapreduce.lib.db.DBConfiguration.INPUT_CLASS_PROPERTY,
                DbRecordWritable.class.getName());
        jobProperties.put(org.apache.hadoop.mapreduce.lib.db.DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tblName);
        jobProperties.put(org.apache.hadoop.mapreduce.lib.db.DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tblName);
        jobProperties.put(org.apache.hadoop.mapreduce.lib.db.DBConfiguration.INPUT_FIELD_NAMES_PROPERTY,
                columnNames);
        jobProperties.put(org.apache.hadoop.mapreduce.lib.db.DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY,
                columnNames);

        for (String key : tblProps.stringPropertyNames()) {
            if (key.startsWith("mapred.jdbc.")) {
                String value = tblProps.getProperty(key);
                jobProperties.put(key, value);
                key = key.replaceAll("mapred", "mapreduce");
                jobProperties.put(key, value);
            }
        }

        for (String key : tblProps.stringPropertyNames()) {
            if (key.startsWith("mapreduce.jdbc.")) {
                String value = tblProps.getProperty(key);
                jobProperties.put(key, value);
                key = key.replaceAll("mapreduce", "mapred");
                jobProperties.put(key, value);
            }
        }
    }

    @Override
    public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException {
        return new DefaultHiveAuthorizationProvider();
    }

    /**
     * @see org.apache.hadoop.hive.ql.exec.FetchOperator#getInputFormatFromCache
     */
    @SuppressWarnings("rawtypes")
    @Override
    public Class<? extends InputFormat> getInputFormatClass() {
        return JdbcInputFormat.class;
    }

    @SuppressWarnings("rawtypes")
    @Override
    public Class<? extends HiveOutputFormat> getOutputFormatClass() {
        // NOTE that must return subclass of HiveOutputFormat
        return JdbcOutputFormat.class;
    }

    @Override
    public Class<? extends SerDe> getSerDeClass() {
        return JdbcSerDe.class;
    }

    /**
     * @see DBConfiguration#INPUT_CONDITIONS_PROPERTY
     */

    @Override
    public DecomposedPredicate decomposePredicate(JobConf jobConf, Deserializer deserializer,
            ExprNodeDesc predicate) {

        IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
        // adding Comaprison Operators
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual");
        analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd");

        // getting all column names
        String columnNames = jobConf.get(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS);

        StringTokenizer st = new StringTokenizer(columnNames, ",");
        // adding allowed column names
        while (st.hasMoreTokens()) {
            String columnName = (String) st.nextToken();
            analyzer.allowColumnName(columnName);
            LOG.info(columnName);
        }
        // filtering out residual and pushed predicate
        List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
        ExprNodeDesc residualPredicate = analyzer.analyzePredicate(predicate, searchConditions);

        for (IndexSearchCondition e : searchConditions) {
            LOG.info("COnditions fetched are: " + e.toString());
        }

        DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
        decomposedPredicate.pushedPredicate = (ExprNodeGenericFuncDesc) analyzer
                .translateSearchConditions(searchConditions);
        decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate;
        if (decomposedPredicate.pushedPredicate != null)
            LOG.info("Predicates pushed: " + decomposedPredicate.pushedPredicate.getExprString());
        if (decomposedPredicate.residualPredicate != null)
            LOG.info("Predicates not Pushed: " + decomposedPredicate.residualPredicate.getExprString());
        return decomposedPredicate;
    }

    private static class JDBCHook implements HiveMetaHook {

        @Override
        public void preCreateTable(Table tbl) throws MetaException {
            if (!MetaStoreUtils.isExternalTable(tbl)) {
                throw new MetaException("Table must be external.");
            }
            // TODO Auto-generated method stub
        }

        @Override
        public void commitCreateTable(Table tbl) throws MetaException {
            // TODO Auto-generated method stub
        }

        @Override
        public void preDropTable(Table tbl) throws MetaException {
            // nothing to do
        }

        @Override
        public void commitDropTable(Table tbl, boolean deleteData) throws MetaException {
            // TODO Auto-generated method stub
        }

        @Override
        public void rollbackCreateTable(Table tbl) throws MetaException {
            // TODO Auto-generated method stub
        }

        @Override
        public void rollbackDropTable(Table tbl) throws MetaException {
            // TODO Auto-generated method stub
        }

    }

}