edu.emory.bmi.datacafe.mongo.MongoConnector.java Source code

Java tutorial

Introduction

Here is the source code for edu.emory.bmi.datacafe.mongo.MongoConnector.java

Source

/*
 * Copyright (c) 2015-2016, Pradeeban Kathiravelu and others. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.emory.bmi.datacafe.mongo;

import com.mongodb.BasicDBObject;
import com.mongodb.Block;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.client.FindIterable;
import edu.emory.bmi.datacafe.conf.ConfigReader;
import edu.emory.bmi.datacafe.constants.MongoConstants;
import edu.emory.bmi.datacafe.core.DataCafeUtil;
import edu.emory.bmi.datacafe.core.conf.DatacafeConstants;
import edu.emory.bmi.datacafe.core.conf.QueryWrapper;
import edu.emory.bmi.datacafe.core.kernel.AbstractDataSourceConnector;
import edu.emory.bmi.datacafe.core.kernel.DataSourcesRegistry;
import edu.emory.bmi.datacafe.hazelcast.HzServer;
import edu.emory.bmi.datacafe.hdfs.HiveConnector;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.bson.Document;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Connects to the Mongo database
 */
public class MongoConnector extends AbstractDataSourceConnector {
    private static Logger logger = LogManager.getLogger(MongoConnector.class.getName());
    private String datalakeID;

    public MongoConnector() {
    }

    public MongoConnector(String datalakeID) {
        HzServer.addValueToMultiMap(DatacafeConstants.DATALAKES_META_MAP, DatacafeConstants.DATALAKES_NAMES,
                datalakeID);
        this.datalakeID = datalakeID;
    }

    /**
     * Gets the list of IDs. Default id, _id is used.
     *
     * @param iterable the collection iterable
     */
    public List<Object> getAllIDs(FindIterable<Document> iterable) {
        return getAllIDs(iterable, MongoConstants.ID_ATTRIBUTE);
    }

    /**
     * Gets the list of IDs
     *
     * @param iterable    the collection iterable
     * @param idAttribute The attribute key that is used as the ID.
     * @return the list of IDs.
     */
    public List<Object> getAllIDs(FindIterable<Document> iterable, String idAttribute) {
        List<Object> idList = new ArrayList<>();
        iterable.forEach((Block<Document>) document -> {
            idList.add(document.get(idAttribute));
        });
        if (logger.isDebugEnabled()) {
            idList.forEach(logger::debug);
        }
        return idList;
    }

    @Override
    public List getAllIDs(String database, String collection, String idAttribute) {
        MongoCollection mongoCollection = new MongoCollection(database, collection);
        return getAllIDs(mongoCollection.iterateCollection(), idAttribute);
    }

    /**
     * Gets the list of IDs
     *
     * @param database   the data base
     * @param collection the collection in the data base
     * @param document   the Ids
     */
    public List<Object> getIDs(String database, String collection, Document document) {
        MongoCollection mongoCollection = new MongoCollection(database, collection);
        return getAllIDs(mongoCollection.getCollection(document));
    }

    /**
     * Get only the values for a chosen sub set of attributes. Default MongoID, _id is assumed.
     *
     * @param database            the data base
     * @param collection          the collection in the data base
     * @param ids                 the list of ids.
     * @param preferredAttributes the attributes to be added.
     * @return the list of DBCursor.
     */
    public List<String> getAttributeValues(String database, String collection, List ids,
            String[] preferredAttributes) {
        return getAttributeValues(database, collection, ids, MongoConstants.ID_ATTRIBUTE, preferredAttributes,
                null);
    }

    @Override
    public List<String> getAttributesWithHeader(String database, String collection, List ids, String idAttribute,
            String[] preferredAttributes) {
        List<String> attributes = new ArrayList<>();
        attributes.add(getChosenAttributeNames(preferredAttributes));

        attributes.addAll(getAttributeValues(database, collection, ids, idAttribute, preferredAttributes,
                new String[] { idAttribute }).stream().collect(Collectors.toList()));
        return attributes;
    }

    /**
     * Get all the values except the default MongoID attribute.
     *
     * @param database   the data base
     * @param collection the collection in the data base
     * @param ids        the list of ids.
     * @return the list of DBCursor.
     */
    public List<String> getAttributeValuesExceptAutoGenMongoId(String database, String collection, List ids,
            String[] preferredAttributes) {
        return getAttributeValues(database, collection, ids, MongoConstants.ID_ATTRIBUTE, preferredAttributes,
                new String[] { MongoConstants.ID_ATTRIBUTE }, true);
    }

    /**
     * Get all the values except the default MongoID attribute.
     *
     * @param database   the data base
     * @param collection the collection in the data base
     * @param ids        the list of ids.
     * @return the list of DBCursor.
     */
    public List<String> getAllAttributeValuesExceptAutoGenMongoId(String database, String collection, List ids) {
        return getAttributeValues(database, collection, ids, MongoConstants.ID_ATTRIBUTE, null,
                new String[] { MongoConstants.ID_ATTRIBUTE }, true);
    }

    @Override
    public List<String> getAttributeValues(String database, String table, List ids, String idAttribute,
            String[] preferredAttributes) {

        return getAttributeValues(database, table, ids, idAttribute, preferredAttributes, null);
    }

    /**
     * Get only the values for a chosen sub set of attributes
     *
     * @param database            the data base
     * @param collection          the collection in the data base
     * @param ids                 the list of ids.
     * @param idAttribute         The attribute key that is used as the ID.
     * @param preferredAttributes the attributes to be added.
     * @param removedAttributes   the attributes to be removed.
     * @return the list of DBCursor.
     */
    public List<String> getAttributeValues(String database, String collection, List ids, String idAttribute,
            String[] preferredAttributes, String[] removedAttributes) {
        return getAttributeValues(database, collection, ids, idAttribute, preferredAttributes, removedAttributes,
                false);
    }

    /**
     * Get only the values for a chosen sub set of attributes
     *
     * @param database            the data base
     * @param collection          the collection in the data base
     * @param ids                 the list of ids.
     * @param idAttribute         The attribute key that is used as the ID.
     * @param preferredAttributes the attributes to be added.
     * @param removedAttributes   the attributes to be removed.
     * @param addHeader           should the headers be added.
     * @return the list of DBCursor.
     */
    public List<String> getAttributeValues(String database, String collection, List ids, String idAttribute,
            String[] preferredAttributes, String[] removedAttributes, boolean addHeader) {
        MongoCollection mongoCollection = new MongoCollection(database, collection);

        DBCollection collection1 = mongoCollection.getCollection();
        List<String> dbCursors = new ArrayList<>();
        Set<String> keySet = collection1.findOne().keySet();

        // Remove the mongo _id attribute.
        if ((MongoConstants.IS_ID_ATTRIBUTE_RANDOM_GENERATED) && (keySet.contains(MongoConstants.ID_ATTRIBUTE))) {
            keySet.remove(MongoConstants.ID_ATTRIBUTE);
        }

        HzServer.addValuesToMultiMap(datalakeID + DatacafeConstants.META_INDICES_MULTI_MAP_SUFFIX,
                DatacafeConstants.ATTRIBUTES_MAP_ENTRY_KEY, keySet);

        HzServer.addValueToMultiMap(datalakeID + DatacafeConstants.META_INDICES_MULTI_MAP_SUFFIX,
                DatacafeConstants.DATASOURCES_MAP_ENTRY_KEY,
                QueryWrapper.getDestinationInDataLakeFromDrill(database, collection));

        for (String key : keySet) {
            if ((datalakeID != null) && !datalakeID.trim().equals("")) {
                HzServer.addValueToMultiMap(datalakeID, key,
                        QueryWrapper.getDestinationInDataLakeFromDrill(database, collection));
            } else {
                HzServer.addValueToMultiMap(key,
                        QueryWrapper.getDestinationInDataLakeFromDrill(database, collection));
            }
        }

        for (Object id : ids) {
            DBCursor results = collection1.find(new BasicDBObject(idAttribute, id),
                    MongoUtil.getDBObjFromAttributes(preferredAttributes, removedAttributes));

            String cursorValue;
            if (addHeader) {
                cursorValue = getCursorValues(DataSourcesRegistry.constructFullDataSourceName(database, collection),
                        results, true);
                addHeader = false;
            } else {
                cursorValue = getCursorValues(DataSourcesRegistry.constructFullDataSourceName(database, collection),
                        results);
            }
            dbCursors.add(cursorValue.trim());
        }
        return dbCursors;
    }

    /**
     * Prints the cursor
     *
     * @param fullDataSourceName the full data source name
     * @param results            the DBCursor
     * @param addHeader          Should a header with attributes be added.
     */
    public String getCursorValues(String fullDataSourceName, DBCursor results, boolean addHeader) {
        String outValue = "";

        while (results.hasNext()) {

            DBObject resultElement = results.next();
            Map resultElementMap = resultElement.toMap();
            Collection resultValues = resultElementMap.values();

            if (addHeader) {
                if (outValue.trim().equals("")) {
                    Collection resultNames = resultElementMap.keySet();
                    outValue += DataCafeUtil.constructStringFromCollection(resultNames);
                    if (!(ConfigReader.getHiveServer().equals("") || (ConfigReader.getHiveServer() == null))) {
                        //start doing the Hive Things
                        String query = DataCafeUtil
                                .wrapTheQuery(DataCafeUtil.constructQueryFromCollection(resultNames));
                        HiveConnector hiveConnector = new HiveConnector(datalakeID);
                        hiveConnector.writeToHive(fullDataSourceName, query);
                    }
                    outValue += "\n";
                }
            }

            String temp = DataCafeUtil.constructStringFromCollection(resultValues);

            outValue += temp;
            if (logger.isDebugEnabled()) {
                logger.debug(outValue);
            }
        }
        return outValue;
    }

    /**
     * Prints the cursor
     *
     * @param results the DBCursor
     */
    public String getCursorValues(String fullDataSourceName, DBCursor results) {
        return getCursorValues(fullDataSourceName, results, false);
    }

    @Override
    public void closeConnections() {
        if (logger.isDebugEnabled()) {
            logger.debug("Successfully closed the Mongo connection.");
        }
        MongoCollection.close();
    }
}