it.polimi.hegira.adapters.datastore.Datastore.java Source code

Java tutorial

Introduction

Here is the source code for it.polimi.hegira.adapters.datastore.Datastore.java

Source

/**
 * Copyright 2015 Marco Scavuzzo
 * Contact: Marco Scavuzzo <marco.scavuzzo@polimi.it>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package it.polimi.hegira.adapters.datastore;

import java.io.IOException;
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;

import com.google.appengine.api.datastore.Cursor;
import com.google.appengine.api.datastore.DatastoreService;
import com.google.appengine.api.datastore.DatastoreServiceFactory;
import com.google.appengine.api.datastore.Entities;
import com.google.appengine.api.datastore.FetchOptions;
import com.google.appengine.api.datastore.Key;
import com.google.appengine.api.datastore.KeyFactory;
import com.google.appengine.api.datastore.KeyRange;
import com.google.appengine.api.datastore.PreparedQuery;
import com.google.appengine.api.datastore.Query;
import com.google.appengine.api.datastore.QueryResultList;
import com.google.appengine.tools.remoteapi.RemoteApiInstaller;
import com.google.appengine.tools.remoteapi.RemoteApiOptions;
import com.google.appengine.api.datastore.Entity;
import com.rabbitmq.client.ConsumerCancelledException;
import com.rabbitmq.client.QueueingConsumer.Delivery;
import com.rabbitmq.client.ShutdownSignalException;

import it.polimi.hegira.adapters.AbstractDatabase;
import it.polimi.hegira.exceptions.ConnectException;
import it.polimi.hegira.exceptions.QueueException;
import it.polimi.hegira.models.DatastoreModel;
import it.polimi.hegira.models.Metamodel;
import it.polimi.hegira.queue.TaskQueue;
import it.polimi.hegira.transformers.DatastoreTransformer;
import it.polimi.hegira.utils.Constants;
import it.polimi.hegira.utils.DefaultErrors;
import it.polimi.hegira.utils.PropertiesManager;
import it.polimi.hegira.vdp.VdpUtils;
import it.polimi.hegira.zkWrapper.MigrationStatus.VDPstatus;
import it.polimi.hegira.zkWrapper.ZKclient;
import it.polimi.hegira.zkWrapper.ZKserver;
import it.polimi.hegira.zkWrapper.statemachine.State;

public class Datastore extends AbstractDatabase {
    private static Logger log = Logger.getLogger(Datastore.class);
    //private RemoteApiInstaller installer;
    //private DatastoreService ds;

    private class ConnectionObject {
        public ConnectionObject() {
        }

        public ConnectionObject(RemoteApiInstaller installer, DatastoreService ds) {
            this.installer = installer;
            this.ds = ds;
        }

        protected RemoteApiInstaller installer;
        protected DatastoreService ds;
    }

    public Datastore(Map<String, String> options) {
        super(options);
        if (TWTs_NO > 0) {
            connectionList = new ArrayList<ConnectionObject>(TWTs_NO);
            for (int i = 0; i < TWTs_NO; i++)
                connectionList.add(new ConnectionObject());
        } else {
            connectionList = new ArrayList<ConnectionObject>(1);
            connectionList.add(new ConnectionObject());
        }
    }

    private ArrayList<ConnectionObject> connectionList;

    @Override
    protected AbstractDatabase fromMyModel(Metamodel mm) {
        // TWC
        //log.debug(Thread.currentThread().getName()+" Hi I'm the GAE consumer!");
        List<Entity> batch = new ArrayList<Entity>();
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        long k = 0;
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        while (true) {
            log.debug(Thread.currentThread().getName() + " Extracting from the taskQueue" + thread_id + " TWTs_NO: "
                    + TWTs_NO);

            try {
                Delivery delivery = taskQueues.get(thread_id).getConsumer().nextDelivery(2000);
                if (delivery != null) {
                    Metamodel myModel = new Metamodel();
                    deserializer.deserialize(myModel, delivery.getBody());

                    DatastoreTransformer dt = new DatastoreTransformer(connectionList.get(thread_id).ds);
                    DatastoreModel fromMyModel = dt.fromMyModel(myModel);

                    batch.add(fromMyModel.getEntity());
                    batch.add(fromMyModel.getFictitiousEntity());

                    taskQueues.get(thread_id).sendAck(delivery.getEnvelope().getDeliveryTag());
                    k++;

                    if (k % 100 == 0) {
                        putBatch(batch);
                        log.debug(Thread.currentThread().getName() + " ===>100 entities. putting normal batch");
                        batch = new ArrayList<Entity>();
                    } else {
                        if (k > 0) {
                            //log.debug(Thread.currentThread().getName()+" ===>Nothing in the queue for me!");
                            putBatch(batch);
                            log.debug(Thread.currentThread().getName()
                                    + " ===>less than 100 entities. putting short batch");
                            batch = new ArrayList<Entity>();
                            k = 0;
                        }
                    }
                }
            } catch (ShutdownSignalException | ConsumerCancelledException | InterruptedException e) {
                log.error("Error consuming from the queue " + TaskQueue.getDefaultTaskQueueName(), e);
            } catch (TException e) {
                log.error("Errore deserializing", e);
            } catch (QueueException e) {
                log.error("Couldn't send the ack to the queue " + TaskQueue.getDefaultTaskQueueName(), e);
            }
        }
    }

    @Override
    protected Metamodel toMyModel(AbstractDatabase db) {
        Datastore datastore = (Datastore) db;
        List<String> kinds = datastore.getAllKinds();
        int thread_id = 0;

        for (String kind : kinds) {
            long i = 0, previousQueueCheckTime = 0;
            int queueElements = 0;

            //Create a new instance of the Thrift Serializer
            TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
            //Datastore cursor to scan a kind
            Cursor cursor = null;

            while (true) {
                QueryResultList<Entity> results = datastore.getEntitiesByKind_withCursor(kind, cursor, 300);

                //CURSOR CODE
                Cursor newcursor = getNextCursor(results);
                /**
                 * newcursor is null if the query result cannot be resumed;
                 * newcursor is equal to cursor if all entities have been read.
                 */
                if (newcursor == null || newcursor.equals(cursor))
                    break;
                else
                    cursor = newcursor;

                //PRODUCTION CODE
                for (Entity entity : results) {
                    DatastoreModel dsModel = new DatastoreModel(entity);
                    dsModel.setAncestorString(entity.getKey().toString());
                    DatastoreTransformer dt = new DatastoreTransformer();
                    Metamodel myModel = dt.toMyModel(dsModel);

                    if (myModel != null) {
                        try {
                            taskQueues.get(thread_id).publish(serializer.serialize(myModel));
                            i++;
                        } catch (QueueException | TException e) {
                            log.error("Serialization Error: ", e);
                        }
                    }
                }
                log.debug(Thread.currentThread().getName() + " Produced: " + i + " entities");

                if (i % 5000 == 0)
                    taskQueues.get(0).slowDownProduction();
            }
            log.debug(Thread.currentThread().getName() + " ==> Transferred " + i + " entities of kind " + kind);
        }
        return null;
    }

    /**
     * Returns the next cursor relative to the given list of entities.
     * @param results The list of entities.
     * @return   The next cursor. <code>null</code> if the query result cannot be resumed;
     */
    private Cursor getNextCursor(QueryResultList<Entity> results) {
        //trying to minimize undocumented errors from the Datastore
        boolean proofCursor = true;
        int timeout_ms = 100, retries = 10;

        Cursor newcursor = null;
        while (proofCursor && retries > 0) {
            try {
                newcursor = results.getCursor();
                proofCursor = false;
            } catch (Exception e) {
                log.error("\n\n\n\n\t\tUndocumented Error !!! " + e.getMessage() + "\n\n\n");
                try {
                    Thread.sleep(timeout_ms);
                    if (timeout_ms < 5000)
                        timeout_ms *= 2;
                } catch (InterruptedException e1) {
                    proofCursor = false;
                }
                retries--;
            }
        }
        return newcursor;
    }

    @Override
    public void connect() throws ConnectException {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        if (!isConnected()) {
            String username = PropertiesManager.getCredentials(Constants.DATASTORE_USERNAME);
            String password = PropertiesManager.getCredentials(Constants.DATASTORE_PASSWORD);
            String server = PropertiesManager.getCredentials(Constants.DATASTORE_SERVER);
            RemoteApiOptions options = new RemoteApiOptions().server(server, 443).credentials(username, password);
            try {
                log.debug(Thread.currentThread().getName() + " - Logging into " + server);
                RemoteApiInstaller installer = new RemoteApiInstaller();
                installer.install(options);
                DatastoreService ds = DatastoreServiceFactory.getDatastoreService();
                ConnectionObject co = new ConnectionObject(installer, ds);
                connectionList.add(thread_id, co);
                log.debug(Thread.currentThread().getName() + " - Added connection object at " + "position: "
                        + connectionList.indexOf(co) + " ThreadId%THREAD_NO=" + thread_id);
            } catch (IOException e) {
                log.error(DefaultErrors.connectionError + "\nStackTrace:\n" + e.getStackTrace());
                throw new ConnectException(DefaultErrors.connectionError);
            }
        } else {
            log.warn(DefaultErrors.alreadyConnected);
            throw new ConnectException(DefaultErrors.alreadyConnected);
        }
    }

    /**
     * Checks if a connection has already been established
     * @return true if connected, false if not.
     */
    public boolean isConnected() {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        try {
            return (connectionList.get(thread_id).installer == null || connectionList.get(thread_id).ds == null)
                    ? false
                    : true;
        } catch (IndexOutOfBoundsException e) {
            return false;
        }
    }

    @Override
    public void disconnect() {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        if (isConnected()) {
            if (connectionList.get(thread_id).installer != null)
                connectionList.get(thread_id).installer.uninstall();
            connectionList.get(thread_id).installer = null;
            connectionList.get(thread_id).ds = null;
            log.debug(Thread.currentThread().getName() + " Disconnected");
        } else {
            log.warn(DefaultErrors.notConnected);
        }
    }

    /**
     * Stores a List of {@link com.google.appengine.api.datastore.Entity} in batch
     * @param batch
     */
    private void putBatch(List<Entity> batch) {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        boolean proof = true;
        while (proof) {
            try {
                connectionList.get(thread_id).ds.put(batch);
                proof = false;
            } catch (ConcurrentModificationException ex) {
                log.error(ex.getMessage() + "...retry");
            }
        }
    }

    private Map<Key, Entity> getEntitiesByKeys(List<Integer> keys, String kind) {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        //building Datastore keys
        ArrayList<Key> dKeys = new ArrayList<Key>(keys.size());
        for (Integer ik : keys) {
            Key dk = KeyFactory.createKey(kind, ik.toString());
            Key dkl = KeyFactory.createKey(kind, ik.longValue());
            dKeys.add(dk);
            dKeys.add(dkl);
        }
        //querying for the given keys
        return connectionList.get(thread_id).ds.get(dKeys);
    }

    /**
     * Uses Datastore SDK's KeyRange class to efficiently get a range of entities.
     * @param start The id of the first entity in the range.
     * @param end The id of the last entity in the range.
     * @param kind The kind of the entities to retrieve.
     * @return The query result, i.e. the entities associated with the KeyRange.
     */
    private Map<Key, Entity> getEntitiesByKeyRange(long start, long end, String kind) {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        //building Datastore keys
        KeyRange dKeys = new KeyRange(null, kind, start, end);
        //querying for the given keys
        return connectionList.get(thread_id).ds.get(dKeys);
    }

    /**
     * Query for a given entity type
     * @param ds The datastore object to connect to the actual datastore
     * @param kind The kind used for the retrieval
     * @return An iterable containing all the entities
     */
    private Iterable<Entity> getEntitiesByKind(String kind) {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        Query q = new Query(kind);
        PreparedQuery pq = connectionList.get(thread_id).ds.prepare(q);
        return pq.asIterable();
    }

    /**
     * Gets a batch of entities of a given kind
     * @param kind The Entity Kind 
     * @param cursor The point where to start fetching entities (<code>null</code> if entities should be fetched). Could be extracted from the returned object.
     * @param pageSize The number of entities to be retrieved in each batch (maximum 300).
     * @return An object containing the entities, the cursor and other stuff.
     */
    private QueryResultList<Entity> getEntitiesByKind_withCursor(String kind, Cursor cursor, int pageSize) {

        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        boolean proof = true;
        QueryResultList<Entity> results = null;
        /**
         * Bullet proof reads from the Datastore.
         */
        while (proof) {
            try {
                FetchOptions fetchOptions = FetchOptions.Builder.withLimit(pageSize);
                if (cursor != null)
                    fetchOptions.startCursor(cursor);
                Query q = new Query(kind);
                PreparedQuery pq = connectionList.get(thread_id).ds.prepare(q);
                results = pq.asQueryResultList(fetchOptions);
                proof = false;
            } catch (Exception e) {
                log.error(Thread.currentThread().getName() + "ERROR: getEntitiesByKind_withCursor -> "
                        + e.getMessage());
            }

        }

        return results;
    }

    /**
     * Gets all the entities descending (even not directly connected) from the given key
     * @param ds the Datastore object
     * @param ancestorKey get descendents of this key
     * @return
     */
    private Iterable<Entity> getDescentents(Key ancestorKey) {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        Query q = new Query().setAncestor(ancestorKey);
        PreparedQuery pq = connectionList.get(thread_id).ds.prepare(q);
        return pq.asIterable();
    }

    /**
     * All entities kinds contained in the Datastore, excluding statistic ones.
     * @return  A list containing all the kinds
     */
    public List<String> getAllKinds() {
        int thread_id = 0;
        if (TWTs_NO != 0)
            thread_id = (int) (Thread.currentThread().getId() % TWTs_NO);
        Iterable<Entity> results = connectionList.get(thread_id).ds.prepare(new Query(Entities.KIND_METADATA_KIND))
                .asIterable();
        //list containing kinds of the root entities
        ArrayList<String> kinds = new ArrayList<String>();
        for (Entity globalStat : results) {
            Key key2 = globalStat.getKey();
            String name = key2.getName();
            if (name.indexOf("_") != 0) {
                kinds.add(name);
            }
        }
        return kinds;
    }

    /**
     * Checks if an entity is root (i.e. it hasn't any parent)
     * @param e The entity to be checked
     * @return <code>true</code> if the given entity is root, <code>false</code> otherwise;
     */
    private boolean isRoot(Entity e) {
        Key key = e.getKey();
        Key parentKey = key.getParent();
        return (parentKey == null) ? true : false;
    }

    @Override
    protected Metamodel toMyModelPartitioned(AbstractDatabase db) {
        Datastore datastore = (Datastore) db;
        //List<String> kinds = datastore.getAllKinds();
        Set<String> kinds = snapshot.keySet();
        //TODO: removing Fabio test kind. Remeber to remove in final version
        kinds.remove("usertable");
        int thread_id = 0;

        for (String kind : kinds) {
            long i = 0;
            //Create a new instance of the Thrift Serializer
            TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());

            //retrieving the total number of entities written so far for this kind.
            int maxSeq = snapshot.get(kind).getLastSeqNr();
            //calculating the total number of VDPs for this kind
            int totalVDPs = snapshot.get(kind).getTotalVDPs(vdpSize);

            //extracting entities per each VDP
            for (int VDPid = 0; VDPid < totalVDPs; VDPid++) {
                //announcing the migration status for the new VDP
                try {
                    //Trying to reduce concurrency on ZooKeeper, better having it on local snapshot
                    if (snapshot.get(kind).getVDPstatus(VDPid).getCurrentState().equals(State.NOT_MIGRATED)) {
                        if (canMigrate(kind, VDPid)) {
                            //generating ids from the VDP
                            /*ArrayList<Integer> ids = VdpUtils.getElements(VDPid, maxSeq, vdpSize);
                            if(VDPid == 0){
                               if(ids.get(0) == 0)
                                  ids.remove(0);
                            }*/

                            int[] vdpExtremes = VdpUtils.getVdpExtremes(VDPid, maxSeq, vdpSize);
                            long start = vdpExtremes[0];
                            long end = vdpExtremes[1];
                            if (VDPid == 0) {
                                if (start == 0)
                                    start = 1;
                            }

                            log.debug(Thread.currentThread().getName() + " Getting entities for VDP: " + kind + "/"
                                    + VDPid);

                            //getting entities from the Datastore
                            //Map<Key, Entity> result = datastore.getEntitiesByKeys(ids, kind);
                            Map<Key, Entity> result;
                            if (end > 0)
                                result = datastore.getEntitiesByKeyRange(start, end, kind);
                            else
                                result = new HashMap<Key, Entity>();

                            //getting the effective #entities to be piggybacked with every Metamodel entity
                            int actualEntitiesNumber = result.size();

                            //Mapping entities to the Metamodel and sending it to the queue.
                            for (Entity entity : result.values()) {
                                DatastoreModel dsModel = new DatastoreModel(entity);
                                dsModel.setAncestorString(entity.getKey().toString());
                                DatastoreTransformer dt = new DatastoreTransformer();
                                Metamodel myModel = dt.toMyModel(dsModel);
                                //Piggybacking the actual number of entities the TWC should expect.
                                HashMap<String, Integer> counters = new HashMap<String, Integer>();
                                counters.put(entity.getKind(), actualEntitiesNumber);
                                myModel.setActualVdpSize(counters);

                                if (myModel != null) {
                                    try {
                                        taskQueues.get(thread_id).publish(serializer.serialize(myModel));
                                        i++;
                                    } catch (QueueException | TException e) {
                                        log.error(Thread.currentThread().getName() + " Serialization Error: ", e);
                                    }
                                }
                            }
                            log.debug(Thread.currentThread().getName() + " Total Produced entities: " + i
                                    + ". Entities from VDPid " + VDPid + ": " + actualEntitiesNumber);

                            //in the event that the client application requested too many ids, so that an entire VDP is empty,
                            //or in the case the client application has removed all entities in a VDP...
                            //there's no reason why that VDP should figure as "NOT_MIGRATED"
                            if (actualEntitiesNumber == 0) {
                                try {
                                    while (!notifyFinishedMigration(kind, VDPid)) {
                                        log.debug(Thread.currentThread().getName() + " I currently can't set VDP "
                                                + VDPid + " to migrated");
                                        Thread.sleep(300);
                                    }
                                } catch (Exception e) {
                                    log.error(Thread.currentThread().getName()
                                            + " Error setting the final migration status for kind: " + kind
                                            + " VDP: " + VDPid, e);
                                    return null;
                                }
                            }

                            if (i % 5000 == 0)
                                taskQueues.get(0).slowDownProduction();
                        } else {
                            //log.debug(Thread.currentThread().getName()+
                            //      " Skipping VDP with id "+VDPid);
                        }
                    } else {
                        //log.debug(Thread.currentThread().getName()+
                        //         " Pre-Skipping VDP with id "+VDPid);
                    }
                } catch (Exception e) {
                    log.error(Thread.currentThread().getName()
                            + " Error setting the initial migration status for kind: " + kind, e);
                    return null;
                }
            }
            //Finish all assigned vdps for this kind
            log.debug(Thread.currentThread().getName() + " ==> Transferred " + i + " entities of kind " + kind);
        }
        return null;
    }

    @Override
    public List<String> getTableList() {
        return getAllKinds();
    }

    @Override
    protected AbstractDatabase fromMyModelPartitioned(Metamodel mm) {
        // TODO Auto-generated method stub
        return null;
    }
}