com.stratio.deep.mongodb.extractor.MongoNativeExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.stratio.deep.mongodb.extractor.MongoNativeExtractor.java

Source

/*
 * Copyright 2014, Stratio.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.stratio.deep.mongodb.extractor;

import static com.stratio.deep.commons.utils.Utils.initConfig;
import static com.stratio.deep.commons.utils.Utils.removeAddressPort;
import static com.stratio.deep.mongodb.utils.UtilMongoDB.MONGO_DEFAULT_ID;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.spark.Partition;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.CommandResult;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.QueryBuilder;
import com.mongodb.ReadPreference;
import com.mongodb.ServerAddress;
import com.stratio.deep.commons.config.BaseConfig;
import com.stratio.deep.commons.config.DeepJobConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.exception.DeepGenericException;
import com.stratio.deep.commons.impl.DeepPartition;
import com.stratio.deep.commons.querybuilder.UpdateQueryBuilder;
import com.stratio.deep.commons.rdd.DeepTokenRange;
import com.stratio.deep.commons.rdd.IExtractor;
import com.stratio.deep.commons.utils.Pair;
import com.stratio.deep.mongodb.config.MongoDeepJobConfig;
import com.stratio.deep.mongodb.partition.MongoPartition;
import com.stratio.deep.mongodb.reader.MongoReader;
import com.stratio.deep.mongodb.writer.MongoWriter;

/**
 * Created by rcrespo on 7/11/14.
 *
 * @param <T> the type parameter
 * @param <S> the type parameter
 */
public abstract class MongoNativeExtractor<T, S extends BaseConfig> implements IExtractor<T, S> {

    /**
     * The constant SPLIT_KEYS.
     */
    public static final String SPLIT_KEYS = "splitKeys";

    /**
     * The constant serialVersionUID.
     */
    private static final long serialVersionUID = -4020891863696443624L;

    /**
     * The Split size.
     */
    private int splitSize = 10;

    /**
     * The constant MONGO_DEFAULT_ID.
     */
    /**
     * The Reader.
     */
    private MongoReader reader;

    /**
     * The Writer.
     */
    private MongoWriter writer;

    /**
     * The Mongo deep job config.
     */
    protected MongoDeepJobConfig<T> mongoDeepJobConfig;

    @Override
    public Partition[] getPartitions(S config) {
        MongoClient mongoClient = null;

        try {

            mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);

            DBCollection collection;
            ServerAddress address = new ServerAddress(mongoDeepJobConfig.getHost());

            List<ServerAddress> addressList = new ArrayList<>();
            addressList.add(address);
            mongoClient = new MongoClient(addressList);

            mongoClient.setReadPreference(ReadPreference.nearest());
            DB db = mongoClient.getDB(mongoDeepJobConfig.getDatabase());
            collection = db.getCollection(mongoDeepJobConfig.getCollection());
            return isShardedCollection(collection) ? calculateShardChunks(collection) : calculateSplits(collection);
        } catch (UnknownHostException e) {

            throw new DeepGenericException(e);
        } finally {
            if (mongoClient != null) {
                mongoClient.close();
            }

        }
    }

    /**
     * Is sharded collection.
     *
     * @param collection the collection
     * @return the boolean
     */
    private boolean isShardedCollection(DBCollection collection) {

        DB config = collection.getDB().getMongo().getDB("config");
        DBCollection configCollections = config.getCollection("collections");

        DBObject dbObject = configCollections
                .findOne(new BasicDBObject(MONGO_DEFAULT_ID, collection.getFullName()));
        return dbObject != null;
    }

    /**
     * Gets shards.
     *
     * @param collection the collection
     * @return the shards
     */
    private Map<String, String[]> getShards(DBCollection collection) {
        DB config = collection.getDB().getSisterDB("config");
        DBCollection configShards = config.getCollection("shards");

        DBCursor cursorShards = configShards.find();

        Map<String, String[]> map = new HashMap<>();
        while (cursorShards.hasNext()) {
            DBObject currentShard = cursorShards.next();
            String currentHost = (String) currentShard.get("host");
            int slashIndex = currentHost.indexOf("/");
            if (slashIndex > 0) {
                map.put((String) currentShard.get(MONGO_DEFAULT_ID),
                        currentHost.substring(slashIndex + 1).split(","));
            }
        }
        return map;
    }

    /**
     * Gets chunks.
     *
     * @param collection the collection
     * @return the chunks
     */
    private DBCursor getChunks(DBCollection collection) {
        DB config = collection.getDB().getSisterDB("config");
        DBCollection configChunks = config.getCollection("chunks");
        return configChunks.find(new BasicDBObject("ns", collection.getFullName()));
    }

    /**
     * Calculate splits.
     *
     * @param collection the collection
     * @return the deep partition [ ]
     */
    private DeepPartition[] calculateSplits(DBCollection collection) {

        BasicDBList splitData = getSplitData(collection);
        List<ServerAddress> serverAddressList = collection.getDB().getMongo().getServerAddressList();

        if (splitData == null) {
            Pair<BasicDBList, List<ServerAddress>> pair = getSplitDataCollectionShardEnviroment(
                    getShards(collection), collection.getDB().getName(), collection.getName());
            splitData = pair.left;
            serverAddressList = pair.right;
        }

        Object lastKey = null; // Lower boundary of the first min split

        List<String> stringHosts = new ArrayList<>();

        for (ServerAddress serverAddress : serverAddressList) {
            stringHosts.add(serverAddress.toString());
        }
        int i = 0;

        MongoPartition[] partitions = new MongoPartition[splitData.size() + 1];

        for (Object aSplitData : splitData) {

            BasicDBObject currentKey = (BasicDBObject) aSplitData;

            Object currentO = currentKey.get(MONGO_DEFAULT_ID);

            partitions[i] = new MongoPartition(mongoDeepJobConfig.getRddId(), i,
                    new DeepTokenRange(lastKey, currentO, stringHosts), MONGO_DEFAULT_ID);

            lastKey = currentO;
            i++;
        }
        QueryBuilder queryBuilder = QueryBuilder.start(MONGO_DEFAULT_ID);
        queryBuilder.greaterThanEquals(lastKey);
        partitions[i] = new MongoPartition(0, i, new DeepTokenRange(lastKey, null, stringHosts), MONGO_DEFAULT_ID);
        return partitions;
    }

    /**
     * Gets split data.
     *
     * @param collection the collection
     * @return the split data
     */
    private BasicDBList getSplitData(DBCollection collection) {

        final DBObject cmd = BasicDBObjectBuilder.start("splitVector", collection.getFullName())
                .add("keyPattern", new BasicDBObject(MONGO_DEFAULT_ID, 1)).add("force", false)
                .add("maxChunkSize", splitSize).get();

        CommandResult splitVectorResult = collection.getDB().getSisterDB("admin").command(cmd);
        return (BasicDBList) splitVectorResult.get(SPLIT_KEYS);

    }

    /**
     * Gets split data collection shard enviroment.
     *
     * @param shards         the shards
     * @param dbName         the db name
     * @param collectionName the collection name
     * @return the split data collection shard enviroment
     */
    private Pair<BasicDBList, List<ServerAddress>> getSplitDataCollectionShardEnviroment(
            Map<String, String[]> shards, String dbName, String collectionName) {
        MongoClient mongoClient = null;
        try {
            Set<String> keys = shards.keySet();

            for (String key : keys) {

                List<ServerAddress> addressList = getServerAddressList(Arrays.asList(shards.get(key)));

                mongoClient = new MongoClient(addressList);

                BasicDBList dbList = getSplitData(mongoClient.getDB(dbName).getCollection(collectionName));

                if (dbList != null) {
                    return Pair.create(dbList, addressList);
                }
            }
        } catch (UnknownHostException e) {
            throw new DeepGenericException(e);
        } finally {
            if (mongoClient != null) {
                mongoClient.close();
            }

        }

        return null;

    }

    @Override
    public List<String> getPreferredLocations(Partition split) {
        return removeAddressPort(((DeepPartition) split).splitWrapper().getReplicas());
    }

    /**
     * Calculates shard chunks.
     *
     * @param collection the collection
     * @return the deep partition [ ]
     */
    private DeepPartition[] calculateShardChunks(DBCollection collection) {

        DBCursor chuncks = getChunks(collection);

        Map<String, String[]> shards = getShards(collection);

        MongoPartition[] deepPartitions = new MongoPartition[chuncks.count()];
        int i = 0;
        boolean keyAssigned = false;
        String key = null;
        while (chuncks.hasNext()) {

            DBObject dbObject = chuncks.next();
            if (!keyAssigned) {
                Set<String> keySet = ((DBObject) dbObject.get("min")).keySet();
                for (String s : keySet) {
                    key = s;
                    keyAssigned = true;
                }
            }
            deepPartitions[i] = new MongoPartition(
                    mongoDeepJobConfig.getRddId(), i, new DeepTokenRange(shards.get(dbObject.get("shard")),
                            ((DBObject) dbObject.get("min")).get(key), ((DBObject) dbObject.get("max")).get(key)),
                    key);
            i++;
        }
        List<MongoPartition> mongoPartitions = Arrays.asList(deepPartitions);

        Collections.shuffle(mongoPartitions);
        return mongoPartitions.toArray(new MongoPartition[mongoPartitions.size()]);
    }

    /**
     * Gets server address list.
     *
     * @param addressStringList the address string list
     * @return the server address list
     * @throws UnknownHostException the unknown host exception
     */
    private List<ServerAddress> getServerAddressList(List<String> addressStringList) throws UnknownHostException {

        List<ServerAddress> addressList = new ArrayList<>();

        for (String addressString : addressStringList) {
            addressList.add(new ServerAddress(addressString));
        }
        return addressList;
    }

    @Override
    public boolean hasNext() {
        return reader.hasNext();
    }

    @Override
    public T next() {
        return transformElement(reader.next());
    }

    @Override
    public void close() {
        if (reader != null) {
            reader.close();
        }

        if (writer != null) {
            writer.close();
        }

    }

    @Override
    public void initIterator(Partition dp, S config) {

        mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);

        reader = new MongoReader(mongoDeepJobConfig);
        reader.init(dp);
    }

    @Override
    public void saveRDD(T entity) {
        writer.save(transformElement(entity));
    }

    @Override
    public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) {

        mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);

        try {
            writer = new MongoWriter(getServerAddressList(mongoDeepJobConfig.getHostList()),
                    mongoDeepJobConfig.getDatabase(), mongoDeepJobConfig.getCollection(),
                    mongoDeepJobConfig.getWriteConcern());
        } catch (UnknownHostException e) {
            throw new DeepGenericException(e);
        }
    }

    /**
     * Transform element.
     *
     * @param dbObject the db object
     * @return the t
     */
    protected abstract T transformElement(DBObject dbObject);

    /**
     * Transform element.
     *
     * @param entity the entity
     * @return the dB object
     */
    protected abstract DBObject transformElement(T entity);

}