com.mongodb.hadoop.hive.MongoStorageHandler.java Source code

Java tutorial

Introduction

Here is the source code for com.mongodb.hadoop.hive.MongoStorageHandler.java

Source

/*
 * Copyright 2010-2013 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.mongodb.hadoop.hive;

import com.mongodb.DBCollection;
import com.mongodb.MongoClientURI;
import com.mongodb.hadoop.hive.input.HiveMongoInputFormat;
import com.mongodb.hadoop.hive.output.HiveMongoOutputFormat;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;

import static com.mongodb.hadoop.hive.BSONSerDe.MONGO_COLS;
import static java.lang.String.format;

/**
 * Used to sync documents in some MongoDB collection with
 * rows in a Hive table
 */
public class MongoStorageHandler extends DefaultStorageHandler implements HiveStoragePredicateHandler {
    // stores the location of the collection
    public static final String MONGO_URI = "mongo.uri";
    // get location of where meta-data is stored about the mongo collection
    public static final String TABLE_LOCATION = "location";
    // location of properties file
    public static final String PROPERTIES_FILE_PATH = "mongo.properties.path";
    private Properties properties = null;

    private static final Log LOG = LogFactory.getLog(MongoStorageHandler.class);

    @Override
    public Class<? extends InputFormat<?, ?>> getInputFormatClass() {
        return HiveMongoInputFormat.class;
    }

    @Override
    public HiveMetaHook getMetaHook() {
        return new MongoHiveMetaHook();
    }

    @Override
    public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() {
        return HiveMongoOutputFormat.class;
    }

    @Override
    public Class<? extends SerDe> getSerDeClass() {
        return BSONSerDe.class;
    }

    private Properties getProperties(final Configuration conf, final String path) throws IOException {
        if (properties == null) {
            properties = MongoConfigUtil.readPropertiesFromFile(conf, path);
        }
        return properties;
    }

    @Override
    public DecomposedPredicate decomposePredicate(final JobConf jobConf, final Deserializer deserializer,
            final ExprNodeDesc predicate) {
        BSONSerDe serde = (BSONSerDe) deserializer;

        // Create a new analyzer capable of handling equality and general
        // binary comparisons (false = "more than just equality").
        // TODO: The analyzer is only capable of handling binary comparison
        // expressions, but we could push down more than that in the future by
        // writing our own analyzer.
        IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
        // Predicate may contain any column.
        for (String colName : serde.columnNames) {
            analyzer.allowColumnName(colName);
        }
        List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
        ExprNodeDesc residual = analyzer.analyzePredicate(predicate, searchConditions);

        DecomposedPredicate decomposed = new DecomposedPredicate();
        decomposed.pushedPredicate = analyzer.translateSearchConditions(searchConditions);
        decomposed.residualPredicate = (ExprNodeGenericFuncDesc) residual;
        return decomposed;
    }

    /**
     * HiveMetaHook used to define events triggered when a hive table is
     * created and when a hive table is dropped.
     */
    private class MongoHiveMetaHook implements HiveMetaHook {
        @Override
        public void preCreateTable(final Table tbl) throws MetaException {
            Map<String, String> tblParams = tbl.getParameters();
            if (!(tblParams.containsKey(MONGO_URI) || tblParams.containsKey(PROPERTIES_FILE_PATH))) {
                throw new MetaException(
                        format("You must specify '%s' or '%s' in TBLPROPERTIES", MONGO_URI, PROPERTIES_FILE_PATH));
            }
        }

        @Override
        public void commitCreateTable(final Table tbl) throws MetaException {
        }

        @Override
        public void rollbackCreateTable(final Table tbl) throws MetaException {
        }

        @Override
        public void preDropTable(final Table tbl) throws MetaException {
        }

        @Override
        public void commitDropTable(final Table tbl, final boolean deleteData) throws MetaException {
            boolean isExternal = MetaStoreUtils.isExternalTable(tbl);

            if (deleteData && !isExternal) {
                Map<String, String> tblParams = tbl.getParameters();
                DBCollection coll;
                if (tblParams.containsKey(MONGO_URI)) {
                    String mongoURIStr = tblParams.get(MONGO_URI);
                    coll = MongoConfigUtil.getCollection(new MongoClientURI(mongoURIStr));
                } else if (tblParams.containsKey(PROPERTIES_FILE_PATH)) {
                    String propertiesPathStr = tblParams.get(PROPERTIES_FILE_PATH);
                    Properties properties;
                    try {
                        properties = getProperties(getConf(), propertiesPathStr);
                    } catch (IOException e) {
                        throw new MetaException("Could not read properties file " + propertiesPathStr + ". Reason: "
                                + e.getMessage());
                    }
                    if (!properties.containsKey(MONGO_URI)) {
                        throw new MetaException("No URI given in properties file: " + propertiesPathStr);
                    }
                    String uriString = properties.getProperty(MONGO_URI);
                    coll = MongoConfigUtil.getCollection(new MongoClientURI(uriString));
                } else {
                    throw new MetaException(
                            format("Could not find properties '%s' or '%s'. " + "At least one must be defined. "
                                    + "Collection not dropped.", MONGO_URI, PROPERTIES_FILE_PATH));
                }
                try {
                    coll.drop();
                } finally {
                    MongoConfigUtil.close(coll.getDB().getMongo());
                }
            }
        }

        @Override
        public void rollbackDropTable(final Table tbl) throws MetaException {
        }
    }

    @Override
    public void configureInputJobProperties(final TableDesc tableDesc, final Map<String, String> jobProperties) {
        Properties properties = tableDesc.getProperties();
        copyJobProperties(properties, jobProperties);
    }

    @Override
    public void configureOutputJobProperties(final TableDesc tableDesc, final Map<String, String> jobProperties) {
        Properties properties = tableDesc.getProperties();
        copyJobProperties(properties, jobProperties);
    }

    /**
     * Helper function to copy properties
     */
    private void copyJobProperties(final Properties from, final Map<String, String> to) {
        // Copy Hive-specific properties used directly by
        // HiveMongoInputFormat, BSONSerDe.
        if (from.containsKey(serdeConstants.LIST_COLUMNS)) {
            to.put(serdeConstants.LIST_COLUMNS, (String) from.get(serdeConstants.LIST_COLUMNS));
        }
        if (from.containsKey(serdeConstants.LIST_COLUMN_TYPES)) {
            to.put(serdeConstants.LIST_COLUMN_TYPES, (String) from.get(serdeConstants.LIST_COLUMN_TYPES));
        }
        if (from.containsKey(MONGO_COLS)) {
            to.put(MONGO_COLS, (String) from.get(MONGO_COLS));
        }
        if (from.containsKey(TABLE_LOCATION)) {
            to.put(TABLE_LOCATION, (String) from.get(TABLE_LOCATION));
        }

        // First, merge properties from the given properties file, if there
        // was one. These can be overwritten by other table properties later.
        String propertiesFilePathString = from.getProperty(PROPERTIES_FILE_PATH);
        if (propertiesFilePathString != null) {
            try {
                Properties properties = getProperties(getConf(), propertiesFilePathString);
                for (Map.Entry<Object, Object> prop : properties.entrySet()) {
                    String key = (String) prop.getKey();
                    String value = (String) prop.getValue();
                    if (key.equals(MONGO_URI)) {
                        // Copy to input/output URI.
                        to.put(MongoConfigUtil.INPUT_URI, value);
                        to.put(MongoConfigUtil.OUTPUT_URI, value);
                    } else {
                        to.put(key, value);
                    }
                }
            } catch (IOException e) {
                LOG.error("Error while trying to read properties file " + propertiesFilePathString, e);
            }
        }

        // Copy general connector properties, such as ones defined in
        // MongoConfigUtil. These are all prefixed with "mongo.".
        for (Entry<Object, Object> entry : from.entrySet()) {
            String key = (String) entry.getKey();
            if (key.startsWith("mongo.")) {
                to.put(key, (String) from.get(key));
            }
        }

        // Update the keys for MONGO_URI per MongoConfigUtil.
        if (from.containsKey(MONGO_URI)) {
            String mongoURIStr = (String) from.get(MONGO_URI);
            to.put(MongoConfigUtil.INPUT_URI, mongoURIStr);
            to.put(MongoConfigUtil.OUTPUT_URI, mongoURIStr);
        }
    }
}