io.cloudex.cloud.impl.google.GoogleCloudServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for io.cloudex.cloud.impl.google.GoogleCloudServiceImpl.java

Source

/**
 * The contents of this file may be used under the terms of the Apache License, Version 2.0
 * in which case, the provisions of the Apache License Version 2.0 are applicable instead of those above.
 *
 * Copyright 2014, Ecarf.io
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.cloudex.cloud.impl.google;

import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.ATTRIBUTES;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.ATTRIBUTES_PATH;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.CLOUD_STORAGE_PREFIX;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.CREATE_IF_NEEDED;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.CREATE_NEVER;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.DATASTORE_SCOPE;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.DEFAULT;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.DISK_TYPES;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.DONE;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.EMAIL;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.EXT_NAT;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.HOSTNAME;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.INSTANCE_ALL_PATH;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.MACHINE_TYPES;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.MIGRATE;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.NETWORK;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.NOT_FOUND;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.ONE_TO_ONE_NAT;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.PERSISTENT;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.PROJECT_ID_PATH;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.RESOURCE_BASE_URL;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.SCOPES;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.SERVICE_ACCOUNTS;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.WAIT_FOR_CHANGE;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.WILDCARD_SUFFIX;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.WRITE_APPEND;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.ZONE;
import static io.cloudex.cloud.impl.google.compute.GoogleMetaData.ZONES;
import static java.net.HttpURLConnection.HTTP_CONFLICT;
import io.cloudex.cloud.impl.google.bigquery.BigQueryStreamable;
import io.cloudex.cloud.impl.google.compute.GoogleMetaData;
import io.cloudex.cloud.impl.google.compute.InstanceStatus;
import io.cloudex.cloud.impl.google.storage.DownloadProgressListener;
import io.cloudex.cloud.impl.google.storage.UploadProgressListener;
import io.cloudex.framework.cloud.api.ApiUtils;
import io.cloudex.framework.cloud.api.AuthenticationProvider;
import io.cloudex.framework.cloud.api.Callback;
import io.cloudex.framework.cloud.api.CloudService;
import io.cloudex.framework.cloud.api.FutureTask;
import io.cloudex.framework.cloud.entities.BigDataColumn;
import io.cloudex.framework.cloud.entities.BigDataTable;
import io.cloudex.framework.cloud.entities.QueryStats;
import io.cloudex.framework.cloud.entities.VmMetaData;
import io.cloudex.framework.config.VmConfig;
import io.cloudex.framework.utils.Constants;
import io.cloudex.framework.utils.FileUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.math.BigInteger;
import java.nio.file.Files;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;

import org.apache.commons.compress.compressors.gzip.GzipUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.googleapis.json.GoogleJsonError;
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.client.http.FileContent;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.http.InputStreamContent;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.client.util.BackOff;
import com.google.api.client.util.Data;
import com.google.api.client.util.ExponentialBackOff;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.ExplainQueryStage;
import com.google.api.services.bigquery.model.GetQueryResultsResponse;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationExtract;
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.JobStatistics;
import com.google.api.services.bigquery.model.JobStatistics2;
import com.google.api.services.bigquery.model.JobStatistics3;
import com.google.api.services.bigquery.model.TableCell;
import com.google.api.services.bigquery.model.TableDataInsertAllRequest;
import com.google.api.services.bigquery.model.TableDataInsertAllResponse;
import com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.compute.Compute;
import com.google.api.services.compute.Compute.Instances.Insert;
import com.google.api.services.compute.model.AccessConfig;
import com.google.api.services.compute.model.AttachedDisk;
import com.google.api.services.compute.model.AttachedDiskInitializeParams;
import com.google.api.services.compute.model.Instance;
import com.google.api.services.compute.model.Metadata;
import com.google.api.services.compute.model.Metadata.Items;
import com.google.api.services.compute.model.NetworkInterface;
import com.google.api.services.compute.model.Operation;
import com.google.api.services.compute.model.Operation.Error.Errors;
import com.google.api.services.compute.model.Scheduling;
import com.google.api.services.compute.model.ServiceAccount;
import com.google.api.services.storage.Storage;
import com.google.api.services.storage.model.Bucket;
import com.google.api.services.storage.model.Objects;
import com.google.api.services.storage.model.StorageObject;
import com.google.common.base.Joiner;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;

/**
 * A Google Cloud Platform specific implementation of cloudex {@link CloudService}
 *
 * TODO add multi-threading option to upload and download files from cloud storage
 *
 * @author Omer Dawelbeit (omerio)
 *
 */
public class GoogleCloudServiceImpl implements GoogleCloudService {

    private final static Log log = LogFactory.getLog(GoogleCloudServiceImpl.class);

    /** Global instance of the JSON factory. */
    private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();

    // the maximum size in bytes for metadata
    private int maximumMetaDataSize = 32_768;

    // the delay in seconds between API checks
    private int apiRecheckDelay = 2;

    private int maxBigQueryRequestSize = 100_000;

    private int bigQueryStreamDelay = 2;

    private int bigQueryStreamFailRetries = 3;

    /** Global instance of the HTTP transport. */
    private HttpTransport httpTransport;

    private Storage storage;

    private Compute compute;

    private Bigquery bigquery;

    private String projectId;

    private String zone;

    private String instanceId;

    private String serviceAccount;

    private List<String> scopes;

    private boolean remote;

    @SuppressWarnings("rawtypes")
    private AuthenticationProvider authenticationProvider;

    /**
     * Perform initialization before
     * this cloud service is used
     * @throws IOException
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    @Override
    public VmMetaData init() throws IOException {

        // can't do anything without an authentication provider
        Validate.notNull(this.authenticationProvider);

        Map<String, Object> attributes = null;
        String fingerprint = null;

        this.getHttpTransport();

        if (this.remote) {
            // if not running on a vm on the cloud environment, then
            // these values need to be set externally
            Validate.notNull(this.zone);
            Validate.notNull(this.projectId);
            Validate.notNull(this.instanceId);
            Validate.notNull(this.scopes);

            Credential credential = (Credential) this.authenticationProvider.authorize();

            this.getCompute(credential);
            this.getStorage(credential);
            this.getBigquery(credential);

        } else {
            this.projectId = GoogleMetaData.getMetaData(PROJECT_ID_PATH);
            Map<String, Object> metaData = GoogleMetaData.getMetaDataAsMap(INSTANCE_ALL_PATH);
            attributes = (Map<String, Object>) metaData.get(ATTRIBUTES);

            // strangely zone looks like this: "projects/315344313954/zones/us-central1-a"
            this.zone = (String) metaData.get(ZONE);
            this.zone = StringUtils.substringAfterLast(this.zone, "/");

            // the name isn't returned!, but the hostname looks like this:
            // "ecarf-evm-1.c.ecarf-1000.internal"
            this.instanceId = (String) metaData.get(HOSTNAME);
            this.instanceId = StringUtils.substringBefore(this.instanceId, ".");

            // get the default service account
            Map<String, Object> serviceAccountConfig = ((Map) ((Map) metaData.get(SERVICE_ACCOUNTS)).get(DEFAULT));
            this.serviceAccount = (String) serviceAccountConfig.get(EMAIL);
            this.scopes = (List) serviceAccountConfig.get(SCOPES);
            // add the datastore scope as well
            this.scopes.add(DATASTORE_SCOPE);

            // no need for this call right now
            //this.authorise();
            this.getCompute();
            this.getStorage();
            this.getBigquery();

            boolean retrying = true;
            int retries = 0;
            Instance instance = null;

            do {
                try {
                    // java.net.SocketTimeoutException: connect timed out
                    instance = this.getInstance(instanceId, zone);
                    retrying = false;

                } catch (IOException e) {
                    log.error("Failed to retrieve instance details, retries: " + retries, e);
                    retries++;
                    if (retries > 3) {
                        throw e;
                    }
                    ApiUtils.block(this.getApiRecheckDelay());
                }

            } while (retrying);

            fingerprint = instance.getMetadata().getFingerprint();
        }

        log.debug("Successfully initialized Google Cloud Service: " + this);
        return new VmMetaData(attributes, fingerprint);

    }

    /**
     * Create a new instance of the HTTP transport
     * @return
     * @throws IOException
     */
    protected HttpTransport getHttpTransport() throws IOException {
        if (httpTransport == null) {
            try {
                httpTransport = GoogleNetHttpTransport.newTrustedTransport();
            } catch (GeneralSecurityException e) {
                log.error("failed to create transport", e);
                throw new IOException(e);
            }
        }

        return httpTransport;
    }

    /**
     * Get the token and also check if it's expired then request a new one
     * Note: this is only relevant if remote = false
     * @return
     * @throws IOException
     */
    protected String getOAuthToken() throws IOException {

        String token = null;

        if (!remote) {
            token = (String) this.authenticationProvider.authorize();
        }

        return token;
    }

    /**
     * Create a compute API client instance
     * @return
     * @throws IOException
     */
    protected Compute getCompute() throws IOException {
        return this.getCompute(null);
    }

    /**
     * Create a compute API client instance
     * @return
     * @throws IOException
     */
    protected Compute getCompute(Credential credential) throws IOException {
        if (this.compute == null) {
            this.compute = new Compute.Builder(getHttpTransport(), JSON_FACTORY, credential)
                    .setApplicationName(Constants.APP_NAME).build();
        }
        return this.compute;
    }

    /**
     * Create a bigquery API client instance
     * @return
     * @throws IOException
     */
    protected Bigquery getBigquery() throws IOException {
        return this.getBigquery(null);
    }

    /**
     * Create a bigquery API client instance
     * @return
     * @throws IOException
     */
    protected Bigquery getBigquery(Credential credential) throws IOException {
        if (this.bigquery == null) {
            this.bigquery = new Bigquery.Builder(getHttpTransport(), JSON_FACTORY, credential)
                    .setApplicationName(Constants.APP_NAME).build();
        }
        return this.bigquery;
    }

    /**
     * Create a storage API client instance
     * @return
     * @throws IOException
     */
    protected Storage getStorage() throws IOException {
        return this.getStorage(null);
    }

    /**
     * Create a storage API client instance
     * @return
     * @throws IOException
     */
    protected Storage getStorage(Credential credential) throws IOException {
        if (this.storage == null) {
            this.storage = new Storage.Builder(getHttpTransport(), JSON_FACTORY, credential)
                    .setApplicationName(Constants.APP_NAME).build();
        }
        return this.storage;
    }

    //------------------------------------------------- Storage -------------------------------
    /**
     * Create a bucket on the mass cloud storage
     * @param bucket
     * @throws IOException
     */
    @Override
    public void createCloudStorageBucket(String bucket, String location) throws IOException {

        Storage.Buckets.Insert insertBucket = this.getStorage().buckets()
                .insert(this.projectId, new Bucket().setName(bucket).setLocation(location)
                // .setDefaultObjectAcl(ImmutableList.of(
                // new ObjectAccessControl().setEntity("allAuthenticatedUsers").setRole("READER")))
                ).setOauthToken(this.getOAuthToken());
        try {
            @SuppressWarnings("unused")
            Bucket createdBucket = insertBucket.execute();
        } catch (GoogleJsonResponseException e) {
            GoogleJsonError error = e.getDetails();
            if (error.getCode() == HTTP_CONFLICT && error.getMessage().contains("You already own this bucket.")) {
                log.debug(bucket + " already exists!");
            } else {
                throw e;
            }
        }
    }

    /**
     * Upload the provided file into cloud storage
     * THis is what Google returns:
     * <pre>
     * CONFIG: {
     "kind": "storage#object",
     "id": "ecarf/umbel_links.nt_out.gz/1397227987451000",
     "selfLink": "https://www.googleapis.com/storage/v1beta2/b/ecarf/o/umbel_links.nt_out.gz",
     "name": "umbel_links.nt_out.gz",
     "bucket": "ecarf",
     "generation": "1397227987451000",
     "metageneration": "1",
     "contentType": "application/x-gzip",
     "updated": "2014-04-11T14:53:07.339Z",
     "storageClass": "STANDARD",
     "size": "8474390",
     "md5Hash": "UPhXcZZGbD9198OhQcdnvQ==",
     "owner": {
      "entity": "user-00b4903a97e56638621f0643dc282444442a11b19141d3c7b425c4d17895dcf6",
      "entityId": "00b4903a97e56638621f0643dc282444442a11b19141d3c7b425c4d17895dcf6"
     },
     "crc32c": "3twYkA==",
     "etag": "CPj48u7X2L0CEAE="
    }
      </pre>
     * @param filename
     * @param bucket
     * @throws IOException
     */
    @Override
    public io.cloudex.framework.cloud.entities.StorageObject uploadFileToCloudStorage(String filename,
            String bucket, Callback callback) throws IOException {

        FileInputStream fileStream = new FileInputStream(filename);
        String contentType;
        boolean gzipDisabled;

        if (GzipUtils.isCompressedFilename(filename)) {
            contentType = Constants.GZIP_CONTENT_TYPE;
            gzipDisabled = true;

        } else {
            contentType = Files.probeContentType((new File(filename)).toPath());
            if (contentType == null) {
                contentType = Constants.BINARY_CONTENT_TYPE;
            }
            gzipDisabled = false;
        }

        InputStreamContent mediaContent = new InputStreamContent(contentType, fileStream);

        // Not strictly necessary, but allows optimization in the cloud.
        mediaContent.setLength(fileStream.available());

        Storage.Objects.Insert insertObject = getStorage().objects()
                .insert(bucket,
                        new StorageObject().setName(
                                StringUtils.substringAfterLast(filename, FileUtils.PATH_SEPARATOR)),
                        mediaContent)
                .setOauthToken(this.getOAuthToken());

        insertObject.getMediaHttpUploader().setProgressListener(new UploadProgressListener(callback))
                .setDisableGZipContent(gzipDisabled);
        // For small files, you may wish to call setDirectUploadEnabled(true), to
        // reduce the number of HTTP requests made to the server.
        if (mediaContent.getLength() > 0 && mediaContent.getLength() <= 4 * FileUtils.ONE_MB /* 2MB */) {
            insertObject.getMediaHttpUploader().setDirectUploadEnabled(true);
        }

        StorageObject object = null;

        try {
            object = insertObject.execute();

        } catch (IOException e) {

            log.error("Error whilst uploading file", e);

            ApiUtils.block(5);
            // try again
            object = insertObject.execute();
        }

        return this.getCloudExStorageObject(object, bucket);
    }

    /**
     * Upload a file to cloud storage and block until it's uploaded
     * @param filename
     * @param bucket
     * @throws IOException
     */
    @Override
    public io.cloudex.framework.cloud.entities.StorageObject uploadFileToCloudStorage(String filename,
            String bucket) throws IOException {

        final FutureTask task = new FutureTask();

        Callback callback = new Callback() {
            @Override
            public void execute() {
                log.debug("Upload complete");
                task.setDone(true);
            }
        };

        io.cloudex.framework.cloud.entities.StorageObject storageObject = this.uploadFileToCloudStorage(filename,
                bucket, callback);

        // wait for the upload to finish
        while (!task.isDone()) {
            ApiUtils.block(5);
        }

        return storageObject;

    }

    /**
     * Download an object from cloud storage to a file
     * @param object
     * @param outFile
     * @param bucket
     * @param callback
     * @throws IOException
     */
    @Override
    public void downloadObjectFromCloudStorage(String object, String outFile, String bucket, Callback callback)
            throws IOException {

        log.debug("Downloading cloud storage file " + object + ", to: " + outFile);

        FileOutputStream out = new FileOutputStream(outFile);

        Storage.Objects.Get getObject = getStorage().objects().get(bucket, object)
                .setOauthToken(this.getOAuthToken());

        getObject.getMediaHttpDownloader().setDirectDownloadEnabled(true)
                .setProgressListener(new DownloadProgressListener(callback));

        getObject.executeMediaAndDownloadTo(out);

    }

    /**
     *  TODO Parallel/Multi download
     * Download an object from cloud storage to a file, this method will block until the file is downloaded
     * @param object
     * @param outFile
     * @param bucket
     * @throws IOException
     *
     */
    @Override
    public void downloadObjectFromCloudStorage(String object, final String outFile, String bucket)
            throws IOException {

        //final Thread currentThread = Thread.currentThread();
        final FutureTask task = new FutureTask();

        Callback callback = new Callback() {
            @Override
            public void execute() {
                log.debug("Download complete, file saved to: " + outFile);
                //LockSupport.unpark(currentThread);
                task.setDone(true);
            }
        };

        this.downloadObjectFromCloudStorage(object, outFile, bucket, callback);

        // wait for the download to take place
        //LockSupport.park();
        while (!task.isDone()) {
            ApiUtils.block(5);
        }

    }

    @Override
    public List<io.cloudex.framework.cloud.entities.StorageObject> listCloudStorageObjects(String bucket)
            throws IOException {
        List<io.cloudex.framework.cloud.entities.StorageObject> objects = new ArrayList<>();
        Storage.Objects.List listObjects = getStorage().objects().list(bucket).setOauthToken(this.getOAuthToken());
        // we are not paging, just get everything
        Objects cloudObjects = listObjects.execute();

        for (StorageObject cloudObject : cloudObjects.getItems()) {
            // Do things!
            io.cloudex.framework.cloud.entities.StorageObject object = this.getCloudExStorageObject(cloudObject,
                    bucket);
            objects.add(object);
        }
        return objects;
    }

    /**
     * Create an ecarf storage object from a Google com.google.api.services.storage.model.StorageObject
     * @param cloudObject
     * @param bucket
     * @return
     */
    private io.cloudex.framework.cloud.entities.StorageObject getCloudExStorageObject(StorageObject cloudObject,
            String bucket) {
        io.cloudex.framework.cloud.entities.StorageObject object = new io.cloudex.framework.cloud.entities.StorageObject();
        object.setContentType(cloudObject.getContentType());
        object.setDirectLink(cloudObject.getSelfLink());
        object.setName(cloudObject.getName());
        object.setSize(cloudObject.getSize());
        object.setUri(CLOUD_STORAGE_PREFIX + bucket + "/" + cloudObject.getName());
        return object;
    }

    //------------------------------------------------- Compute -------------------------------

    /**
     *
     * @param instanceId
     * @param zoneId
     * @return
     * @throws IOException
     */
    private Instance getInstance(String instanceId, String zoneId) throws IOException {
        return this.getCompute().instances().get(this.projectId, zoneId != null ? zoneId : this.zone, instanceId)
                .setOauthToken(this.getOAuthToken()).execute();
    }

    /**
     * Get the meta data of the current instance, this will simply call the metadata server.
     * Wait for change will block until there is a change
     * @return
     * @throws IOException
     */
    @Override
    public VmMetaData getMetaData(boolean waitForChange) throws IOException {

        Map<String, Object> attributes = GoogleMetaData
                .getMetaDataAsMap(ATTRIBUTES_PATH + (waitForChange ? WAIT_FOR_CHANGE : ""));
        Instance instance = this.getInstance(instanceId, zone);
        String fingerprint = instance.getMetadata().getFingerprint();
        return new VmMetaData(attributes, fingerprint);

    }

    /**
     * Get the meta data for the provided instance id
     * @param instanceId
     * @param zoneId
     * @return
     * @throws IOException
     */
    @Override
    public VmMetaData getMetaData(String instanceId, String zoneId) throws IOException {
        Instance instance = this.getInstance(instanceId != null ? instanceId : this.instanceId,
                zoneId != null ? zoneId : this.zone);
        List<Items> items = instance.getMetadata().getItems();
        Map<String, Object> attributes = new HashMap<>();
        for (Items item : items) {
            attributes.put(item.getKey(), item.getValue());
        }
        String fingerprint = instance.getMetadata().getFingerprint();
        return new VmMetaData(attributes, fingerprint);
    }

    /**
     *
     * Update the meta data of the current instance
     * @param metaData
     * @throws IOException
     */
    @Override
    public void updateMetadata(VmMetaData metaData) throws IOException {
        this.updateMetadata(metaData, this.zone, this.instanceId, true);
    }

    /**
     *
     * Update the meta data of the the provided instance
     * @param metaData
     * @param zoneId
     * @param instanceId
     * @param block
     * @throws IOException
     */
    @Override
    public String updateMetadata(VmMetaData metaData, String zoneId, String instanceId, boolean block)
            throws IOException {

        Metadata metadata = this.getGoogleMetaData(metaData);

        Operation operation = this.getCompute().instances().setMetadata(projectId, zoneId, instanceId, metadata)
                .setOauthToken(this.getOAuthToken()).execute();

        log.debug("Successuflly initiated operation: " + operation);

        // shall we wait until the operation is complete?
        if (block) {
            this.blockOnOperation(operation, zoneId);

            // update the fingerprint of the current metadata
            Instance instance = this.getInstance(instanceId, zoneId);
            metaData.setFingerprint(instance.getMetadata().getFingerprint());

        }

        return operation.getName();
    }

    /**
     * Block and wait the operations with the provided references to complete
     * @param references
     * @param zoneId
     * @throws IOException
     */
    @Override
    public void blockOnComputeOperations(List<String> references, String zoneId) throws IOException {

        for (String reference : references) {

            Operation operation = null;

            do {

                operation = this.getCompute().zoneOperations().get(projectId, zoneId, reference)
                        .setOauthToken(this.getOAuthToken()).execute();

                // check if the operation has actually failed
                if ((operation.getError() != null) && (operation.getError().getErrors() != null)) {
                    Errors error = operation.getError().getErrors().get(0);
                    throw new IOException("Operation failed: " + error.getCode() + " - " + error.getMessage());
                }

            } while ((operation != null) && !DONE.endsWith(operation.getStatus()));
        }
    }

    /**
     * Wait until the provided operation is done, if the operation returns an error then an IOException
     * will be thrown
     * @param operation
     * @param zoneId
     * @throws IOException
     */
    protected void blockOnOperation(Operation operation, String zoneId) throws IOException {
        do {
            // sleep for 2 seconds before checking the operation status
            ApiUtils.block(this.getApiRecheckDelay());

            operation = this.getCompute().zoneOperations().get(projectId, zoneId, operation.getName())
                    .setOauthToken(this.getOAuthToken()).execute();

            // check if the operation has actually failed
            if ((operation.getError() != null) && (operation.getError().getErrors() != null)) {
                Errors error = operation.getError().getErrors().get(0);
                throw new IOException("Operation failed: " + error.getCode() + " - " + error.getMessage());
            }

        } while (!DONE.endsWith(operation.getStatus()));
    }

    /**
     * Create VM instances, optionally block until all are created. If any fails then the returned flag is false
     *
     *  body = {
    'name': NEW_INSTANCE_NAME,
    'machineType': &lt;fully-qualified-machine-type-url&gt;,
    'networkInterfaces': [{
      'accessConfigs': [{
    'type': 'ONE_TO_ONE_NAT',
    'name': 'External NAT'
       }],
      'network': &lt;fully-qualified-network-url&gt;
    }],
    'disk': [{
       'autoDelete': 'true',
       'boot': 'true',
       'type': 'PERSISTENT',
       'initializeParams': {
      'diskName': 'my-root-disk',
      'sourceImage': '&lt;fully-qualified-image-url&gt;',
       }
     }]
    }
     * @param configs
     * @param block
     * @throws IOException
     * TODO start VMs in parallel
     */
    @Override
    public boolean startInstance(List<VmConfig> configs, boolean block) throws IOException {

        for (VmConfig config : configs) {
            log.debug("Creating VM for config: " + config);
            String zoneId = config.getZoneId();
            zoneId = zoneId != null ? zoneId : this.zone;
            Instance content = new Instance();

            // Instance config
            content.setMachineType(
                    RESOURCE_BASE_URL + this.projectId + ZONES + zoneId + MACHINE_TYPES + config.getVmType());
            content.setName(config.getInstanceId());
            //content.setZone(zoneId);
            // startup script
            if (StringUtils.isNoneBlank(config.getStartupScript())) {
                config.getMetaData().addValue(GoogleMetaData.STARTUP_SCRIPT, config.getStartupScript());
            }
            content.setMetadata(this.getGoogleMetaData(config.getMetaData()));

            // service account
            ServiceAccount sa = new ServiceAccount();
            sa.setEmail(this.serviceAccount).setScopes(this.scopes);
            content.setServiceAccounts(Lists.newArrayList(sa));

            // network
            NetworkInterface inf = new NetworkInterface();
            inf.setNetwork(RESOURCE_BASE_URL + this.projectId + NETWORK + config.getNetworkId());
            // add ability to turn off external IP addresses
            if (config.getNoExternalIp() == null || Boolean.FALSE.equals(config.getNoExternalIp())) {
                AccessConfig accessConf = new AccessConfig();
                accessConf.setType(ONE_TO_ONE_NAT).setName(EXT_NAT);
                inf.setAccessConfigs(Lists.newArrayList(accessConf));
            }

            content.setNetworkInterfaces(Lists.newArrayList(inf));

            // scheduling
            Scheduling scheduling = new Scheduling();
            scheduling.setAutomaticRestart(false);
            scheduling.setOnHostMaintenance(MIGRATE);
            content.setScheduling(scheduling);

            // Disk
            AttachedDisk disk = new AttachedDisk();

            AttachedDiskInitializeParams params = new AttachedDiskInitializeParams();
            params.setDiskName(config.getInstanceId());
            params.setSourceImage(RESOURCE_BASE_URL + config.getImageId());
            if (config.getDiskSize() != null) {
                params.setDiskSizeGb(config.getDiskSize());
            }

            disk.setAutoDelete(true).setBoot(true).setDeviceName(config.getInstanceId()).setType(PERSISTENT)
                    .setInitializeParams(params);

            if (StringUtils.isNotBlank(config.getDiskType())) {
                // standard or SSD based disks
                params.setDiskType(
                        RESOURCE_BASE_URL + this.projectId + ZONES + zoneId + DISK_TYPES + config.getDiskType());
            }

            content.setDisks(Lists.newArrayList(disk));

            Insert insert = this.getCompute().instances().insert(this.projectId, zoneId, content)
                    .setOauthToken(this.getOAuthToken());

            Operation operation = insert.execute();
            log.debug("Successuflly initiated operation: " + operation);
        }

        boolean success = true;

        // we have to block until all instances are provisioned
        if (block) {

            for (VmConfig config : configs) {
                String status = InstanceStatus.PROVISIONING.toString();

                int retries = 20;

                do {

                    // sleep for some seconds before checking the vm status
                    ApiUtils.block(this.getApiRecheckDelay());

                    String zoneId = config.getZoneId();
                    zoneId = zoneId != null ? zoneId : this.zone;
                    // check the instance status
                    Instance instance = null;

                    // seems the Api sometimes return a not found exception
                    try {
                        instance = this.getInstance(config.getInstanceId(), zoneId);
                        status = instance.getStatus();
                        log.debug(config.getInstanceId() + ", current status is: " + status);

                    } catch (GoogleJsonResponseException e) {
                        if (e.getMessage().indexOf(NOT_FOUND) == 0) {
                            log.warn("Instance not found: " + config.getInstanceId());
                            if (retries <= 0) {
                                throw e;
                            }
                            retries--;
                            ApiUtils.block(5);

                        } else {
                            throw e;
                        }
                    }
                    // FIXME INFO: ecarf-evm-1422261030407, current status is: null
                } while (InstanceStatus.IN_PROGRESS.contains(status));

                if (InstanceStatus.TERMINATED.equals(status)) {
                    success = false;
                }
            }
        }

        return success;
    }

    /**
     * Delete the VMs provided in this config
     * @param configs
     * @throws IOException
     *
     */
    @Override
    public void shutdownInstance(List<VmConfig> configs) throws IOException {
        for (VmConfig config : configs) {
            log.debug("Deleting VM: " + config);
            String zoneId = config.getZoneId();
            zoneId = zoneId != null ? zoneId : this.zone;
            this.getCompute().instances().delete(this.projectId, zoneId, config.getInstanceId())
                    .setOauthToken(this.getOAuthToken()).execute();
        }
    }

    /**
     * Delete the currently running vm, i.e. self terminate
     * @throws IOException
     */
    @Override
    public void shutdownInstance() throws IOException {

        log.debug("Deleting VM: " + this.instanceId);
        this.getCompute().instances().delete(this.projectId, this.zone, this.instanceId)
                .setOauthToken(this.getOAuthToken()).execute();

    }

    /**
     * Create an API Metadata
     * @param vmMetaData
     * @return
     */
    protected Metadata getGoogleMetaData(VmMetaData vmMetaData) {
        Metadata metadata = new Metadata();

        Items item;
        List<Items> items = new ArrayList<>();
        for (Entry<String, Object> entry : vmMetaData.getAttributes().entrySet()) {
            item = new Items();
            item.setKey(entry.getKey()).setValue((String) (entry.getValue()));
            items.add(item);
        }
        metadata.setItems(items);
        metadata.setFingerprint(vmMetaData.getFingerprint());

        return metadata;
    }

    //------------------------------------------------- Bigquery -------------------------------

    /**
     * CONFIG: {
       "kind": "bigquery#job",
       "etag": "\"QPJfVWBscaHhAhSLq0k5xRS6X5c/xAdd09GSpMDr9PxAk-WGEBWxlKA\"",
       "id": "ecarf-1000:job_uUL5E0xmOjKxf3hREEZvb5B_M78",
       "selfLink": "https://www.googleapis.com/bigquery/v2/projects/ecarf-1000/jobs/job_uUL5E0xmOjKxf3hREEZvb5B_M78",
       "jobReference": {
    "projectId": "ecarf-1000",
    "jobId": "job_uUL5E0xmOjKxf3hREEZvb5B_M78"
       },
       "configuration": {
    "load": {
     "sourceUris": [
      "gs://ecarf/umbel_links.nt_out.gz",
      "gs://ecarf/yago_links.nt_out.gz"
     ],
     "schema": {
      "fields": [
       {
        "name": "subject",
        "type": "STRING"
       },
       {
        "name": "object",
        "type": "STRING"
       },
       {
        "name": "predicate",
        "type": "STRING"
       }
      ]
     },
     "destinationTable": {
      "projectId": "ecarf-1000",
      "datasetId": "swetodlp",
      "tableId": "test"
     }
    }
       },
       "status": {
    "state": "DONE"
       },
       "statistics": {
    "creationTime": "1398091486326",
    "startTime": "1398091498083",
    "endTime": "1398091576483",
    "load": {
     "inputFiles": "2",
     "inputFileBytes": "41510712",
     "outputRows": "3782729",
     "outputBytes": "554874551"
    }
       }
      }
     * @param files - The source URIs must be fully-qualified, in the format gs://&lt;bucket&gt;/&lt;object&gt;.
     * @param table
     * @param createTable
     * @return
     * @throws IOException
     */
    @Override
    public String loadCloudStorageFilesIntoBigData(List<String> files, BigDataTable table, boolean createTable)
            throws IOException {
        log.debug("Loading data from files: " + files + ", into big data table: " + table);

        Validate.notNull(table.getName());

        Job job = new Job();
        JobConfiguration config = new JobConfiguration();
        JobConfigurationLoad load = new JobConfigurationLoad();
        config.setLoad(load);
        job.setConfiguration(config);

        load.setSourceUris(files);
        load.setCreateDisposition(createTable ? CREATE_IF_NEEDED : CREATE_NEVER);
        load.setWriteDisposition(WRITE_APPEND);

        Validate.notNull(table.getColumns());
        load.setSchema(this.getBigQueryTableSchema(table));

        String[] names = StringUtils.split(table.getName(), '.');
        TableReference tableRef = (new TableReference()).setProjectId(this.projectId).setDatasetId(names[0])
                .setTableId(names[1]);
        load.setDestinationTable(tableRef);

        Bigquery.Jobs.Insert insert = this.getBigquery().jobs().insert(projectId, job);

        insert.setProjectId(projectId);
        insert.setOauthToken(this.getOAuthToken());

        JobReference jobRef = insert.execute().getJobReference();

        log.debug("Job ID of Load Job is: " + jobRef.getJobId());

        // TODO add retry support
        return this.checkBigQueryJobResults(jobRef.getJobId(), false, true);

    }

    /**
     * Create a Google BigQuery table schema
     * @param table
     * @return
     */
    protected TableSchema getBigQueryTableSchema(BigDataTable table) {

        TableSchema schema = new TableSchema();

        List<TableFieldSchema> fields = new ArrayList<>();
        TableFieldSchema field;
        List<BigDataColumn> columns = table.getColumns();

        for (BigDataColumn column : columns) {
            field = new TableFieldSchema();
            field.setName(column.getName());
            field.setType(column.getType());
            if (column.isRequired()) {
                field.setMode(GoogleMetaData.REQUIRED);
            }
            fields.add(field);
        }
        schema.setFields(fields);

        return schema;

    }

    /**
     * CONFIG: {
    "kind": "bigquery#job",
    "etag": "\"QPJfVWBscaHhAhSLq0k5xRS6X5c/RenEm3VqmGyNz-qo48hIw9I6GYQ\"",
    "id": "ecarf-1000:job_gJed2_eIOXJaMi8RXKjps0hgFhY",
    "selfLink": "https://www.googleapis.com/bigquery/v2/projects/ecarf-1000/jobs/job_gJed2_eIOXJaMi8RXKjps0hgFhY",
    "jobReference": {
     "projectId": "ecarf-1000",
     "jobId": "job_gJed2_eIOXJaMi8RXKjps0hgFhY"
    },
    "configuration": {
     "load": {
      "schema": {
       "fields": [
    {
     "name": "subject",
     "type": "STRING"
    },
    {
     "name": "object",
     "type": "STRING"
    },
    {
     "name": "predicate",
     "type": "STRING"
    }
       ]
      },
      "destinationTable": {
       "projectId": "ecarf-1000",
       "datasetId": "swetodlp",
       "tableId": "test"
      },
      "createDisposition": "CREATE_NEVER",
      "encoding": "UTF-8"
     }
    },
    "status": {
     "state": "RUNNING"
    },
    "statistics": {
     "creationTime": "1398092776236",
     "startTime": "1398092822962",
     "load": {
      "inputFiles": "1",
      "inputFileBytes": "8474390"
     }
    }
    }
     * @param files
     * @param table
     * @param createTable
     * @return
     * @throws IOException
     */
    @Override
    public List<String> loadLocalFilesIntoBigData(List<String> files, BigDataTable table, boolean createTable)
            throws IOException {
        /*TableSchema schema = new TableSchema();
        schema.setFields(new ArrayList<TableFieldSchema>());
        JacksonFactory JACKSON = new JacksonFactory();
        JACKSON.createJsonParser(new FileInputStream("schema.json"))
        .parseArrayAndClose(schema.getFields(), TableFieldSchema.class, null);
        schema.setFactory(JACKSON);*/

        Stopwatch stopwatch = Stopwatch.createStarted();

        String[] names = getTableAndDatasetNames(table.getName());

        TableReference tableRef = (new TableReference()).setProjectId(this.projectId).setDatasetId(names[0])
                .setTableId(names[1]);

        Job job = new Job();
        JobConfiguration config = new JobConfiguration();
        JobConfigurationLoad load = new JobConfigurationLoad();

        Validate.notNull(table.getColumns());
        load.setSchema(this.getBigQueryTableSchema(table));
        load.setDestinationTable(tableRef);

        load.setEncoding(Constants.UTF8);
        load.setCreateDisposition(createTable ? CREATE_IF_NEEDED : CREATE_NEVER);
        load.setWriteDisposition(WRITE_APPEND);

        config.setLoad(load);
        job.setConfiguration(config);

        List<String> jobIds = new ArrayList<>();

        for (String file : files) {
            FileContent content = new FileContent(Constants.BINARY_CONTENT_TYPE, new File(file));

            Bigquery.Jobs.Insert insert = this.getBigquery().jobs().insert(projectId, job, content);

            insert.setProjectId(projectId);
            insert.setOauthToken(this.getOAuthToken());

            JobReference jobRef = insert.execute().getJobReference();
            jobIds.add(jobRef.getJobId());

        }
        List<String> completedIds = new ArrayList<>();

        for (String jobId : jobIds) {
            // TODO add retry support
            completedIds.add(this.checkBigQueryJobResults(jobId, false, false));
        }

        log.debug("Uploaded " + files.size() + " files into bigquery in " + stopwatch);

        return completedIds;

    }

    /**
     * Return the name of the table and it's dataset i.e.
     * mydataset.mytable returns [mydataset, mytable]
     * @param table
     * @return
     */
    private String[] getTableAndDatasetNames(String table) {
        Validate.notNull(table);

        return StringUtils.split(table, '.');
    }

    /**
     * Stream triple data into big query
     * @param objects
     * @param table
     * @throws IOException
     */
    @Override
    public void streamObjectsIntoBigData(Collection<? extends BigQueryStreamable> objects, BigDataTable table)
            throws IOException {

        Stopwatch stopwatch = Stopwatch.createStarted();

        String[] names = getTableAndDatasetNames(table.getName());

        String datasetId = names[0];
        String tableId = names[1];
        //String timestamp = Long.toString((new Date()).getTime());

        List<TableDataInsertAllRequest.Rows> rowList = new ArrayList<>();

        //TableRow row;
        TableDataInsertAllRequest.Rows rows;

        for (BigQueryStreamable object : objects) {
            //row = new TableRow();
            //row.set
            rows = new TableDataInsertAllRequest.Rows();
            //rows.setInsertId(timestamp);
            rows.setJson(object.toMap());
            rowList.add(rows);
        }

        if (rowList.size() > maxBigQueryRequestSize) {

            int itr = (int) Math.ceil(rowList.size() * 1.0 / maxBigQueryRequestSize);

            for (int i = 1; i <= itr; i++) {

                int index;

                if (i == itr) {
                    // last iteration
                    index = rowList.size();

                } else {
                    index = maxBigQueryRequestSize;
                }

                List<TableDataInsertAllRequest.Rows> requestRows = rowList.subList(0, index);
                this.streamRowsIntoBigQuery(datasetId, tableId, requestRows, 0);
                requestRows.clear();

                //block for a short moment to avoid rateLimitExceeded errors
                ApiUtils.block(bigQueryStreamDelay);

            }

        } else {
            this.streamRowsIntoBigQuery(datasetId, tableId, rowList, 0);
        }

        stopwatch.stop();

        log.debug("Streamed " + objects.size() + " triples into bigquery in " + stopwatch);

    }

    /**
     * Stream a list of rows into bigquery. Retries 3 times if the insert of some rows has failed, i.e. Bigquery returns
     * an insert error
     *
     *  {
    "insertErrors" : [ {
      "errors" : [ {
        "reason" : "timeout"
      } ],
      "index" : 8
    }],
      "kind" : "bigquery#tableDataInsertAllResponse"
    }
     *
     * @param datasetId
     * @param tableId
     * @param rowList
     * @throws IOException
     */
    protected void streamRowsIntoBigQuery(String datasetId, String tableId,
            List<TableDataInsertAllRequest.Rows> rowList, int retries) throws IOException {

        /*
         * ExponentialBackOff backoff = ExponentialBackOff.builder()
        .setInitialIntervalMillis(500)
        .setMaxElapsedTimeMillis(900000)
        .setMaxIntervalMillis(6000)
        .setMultiplier(1.5)
        .setRandomizationFactor(0.5)
        .build();
         */

        TableDataInsertAllRequest content = new TableDataInsertAllRequest().setRows(rowList);

        boolean retrying = false;
        ExponentialBackOff backOff = new ExponentialBackOff();

        TableDataInsertAllResponse response = null;
        // keep trying and exponentially backoff as needed
        do {
            try {

                response = this.getBigquery().tabledata().insertAll(this.projectId, datasetId, tableId, content)
                        .setOauthToken(this.getOAuthToken()).execute();

                log.debug(response.toPrettyString());
                retrying = false;

            } catch (GoogleJsonResponseException e) {

                GoogleJsonError error = e.getDetails();

                // check for rate limit errors
                if ((error != null) && (error.getErrors() != null) && !error.getErrors().isEmpty()
                        && GoogleMetaData.RATE_LIMIT_EXCEEDED.equals(error.getErrors().get(0).getReason())) {

                    log.error("Failed to stream data, error: " + error.getMessage());

                    long backOffTime = backOff.nextBackOffMillis();

                    if (backOffTime == BackOff.STOP) {
                        // we are not retrying anymore
                        log.warn("Failed after " + backOff.getElapsedTimeMillis() / 1000
                                + " seconds of elapsed time");
                        throw e;

                    } else {
                        int period = (int) Math.ceil(backOffTime / 1000);
                        if (period == 0) {
                            period = 1;
                        }
                        log.debug("Backing off for " + period + " seconds.");
                        ApiUtils.block(period);
                        retrying = true;
                    }

                } else {
                    log.error("Failed to stream data", e);
                    throw e;
                }
            }

        } while (retrying);

        // check for failed rows
        if ((response != null) && (response.getInsertErrors() != null) && !response.getInsertErrors().isEmpty()) {
            List<TableDataInsertAllRequest.Rows> failedList = new ArrayList<>();

            List<InsertErrors> insertErrors = response.getInsertErrors();

            for (InsertErrors error : insertErrors) {
                failedList.add(rowList.get(error.getIndex().intValue()));
            }

            // retry again for the failed list
            if (retries > bigQueryStreamFailRetries) {

                log.warn("Failed to stream some rows into bigquery after 3 retries");
                throw new IOException("Failed to stream some rows into bigquery after 3 retries");

            } else {
                retries++;
                log.warn(failedList.size() + " rows failed to be inserted retrying again. Retries = " + retries);
                this.streamRowsIntoBigQuery(datasetId, tableId, failedList, retries);
            }
        }
    }

    @Override
    public String startBigDataQuery(String querySql) throws IOException {

        return this.startBigDataQuery(querySql, null);

    }

    /**
     * Creates an asynchronous Query Job for a particular query on a dataset
     *
     * @param querySql  the actual query string
     * @param table the table details
     * @return a reference to the inserted query job
     * @throws IOException
     */
    @Override
    public String startBigDataQuery(String querySql, BigDataTable table) throws IOException {

        log.debug("Inserting Query Job: " + querySql);

        Job job = new Job();
        JobConfiguration config = new JobConfiguration();
        JobConfigurationQuery queryConfig = new JobConfigurationQuery();
        config.setQuery(queryConfig);

        job.setConfiguration(config);
        queryConfig.setQuery(querySql);

        // if a table is provided then set the large results to true and supply
        // a temp table
        if (table != null) {
            String[] names = this.getTableAndDatasetNames(table.getName());
            String insId = StringUtils.replace(instanceId, "-", "_");
            String tempTable = names[1] + '_' + insId + '_' + System.currentTimeMillis();

            TableReference tableRef = (new TableReference()).setProjectId(this.projectId).setDatasetId(names[0])
                    .setTableId(tempTable);

            queryConfig.setAllowLargeResults(true);
            queryConfig.setDestinationTable(tableRef);
        }

        com.google.api.services.bigquery.Bigquery.Jobs.Insert insert = this.getBigquery().jobs().insert(projectId,
                job);

        insert.setProjectId(projectId);
        insert.setOauthToken(this.getOAuthToken());
        // TODO java.net.SocketTimeoutException: Read timed out
        JobReference jobRef = insert.execute().getJobReference();

        log.debug("Job ID of Query Job is: " + jobRef.getJobId());

        return jobRef.getJobId();
    }

    /**
     * Polls the status of a BigQuery job, returns Job reference if "Done"
     * This method will block until the job status is Done
     * @param jobId     a reference to an inserted query Job
     * @return a reference to the completed Job
     * @throws IOException
     */
    protected String checkBigQueryJobResults(String jobId, boolean retry, boolean throwError) throws IOException {

        Job pollJob = this.waitForBigQueryJobResults(jobId, retry, throwError);

        return pollJob.getJobReference().getJobId();
    }

    /**
     * Polls the status of a BigQuery job, returns Job reference if "Done"
     * This method will block until the job status is Done
     * @param jobId     a reference to an inserted query Job
     * @return a reference to the completed Job
     * @throws IOException
     */
    protected Job waitForBigQueryJobResults(String jobId, boolean retry, boolean throwError) throws IOException {
        // Variables to keep track of total query time
        Stopwatch stopwatch = Stopwatch.createStarted();

        String status = null;
        Job pollJob = null;
        int retries = 0;

        int interval = this.getApiRecheckDelay();

        do {

            try {
                pollJob = this.getBigquery().jobs().get(projectId, jobId).setOauthToken(this.getOAuthToken())
                        .execute();

                status = pollJob.getStatus().getState();

                log.debug("Job Status: " + status + ", elapsed time (secs): " + stopwatch);

                // Pause execution for one second before polling job status again, to
                // reduce unnecessary calls to the BigQUery API and lower overall
                // application bandwidth.
                if (!GoogleMetaData.DONE.equals(status)) {
                    ApiUtils.block(interval);

                    // add a second sleep time for every minute taken by the job
                    // some bigquery jobs can take long time e.g. export
                    int minutes = (int) stopwatch.elapsed(TimeUnit.MINUTES);
                    if (minutes > interval) {
                        interval = minutes;
                    }
                }

            } catch (IOException e) {
                log.error("Failed to get job details, retries" + retries, e);
                retries++;
                if (retries > 3) {
                    throw e;
                }
            }
            //  Error handling

        } while (!GoogleMetaData.DONE.equals(status));

        stopwatch.stop();

        //String completedJobId = pollJob.getJobReference().getJobId();

        log.debug("Job completed successfully" + pollJob.toPrettyString());
        this.printJobStats(pollJob);

        if (retry && (pollJob.getStatus().getErrorResult() != null)) {
            pollJob = this.retryFailedBigQueryJob(pollJob);
        }

        if (throwError && (pollJob.getStatus().getErrorResult() != null)) {
            ErrorProto error = pollJob.getStatus().getErrorResult();
            log.debug("Error result" + error);
            throw new IOException("message: " + error.getMessage() + ", reason: " + error.getReason());
        }

        return pollJob;
    }

    /**
     * Check a number of completed jobs
     * @param jobId
     * @return
     * @throws IOException
     */
    protected Job getCompletedBigQueryJob(String jobId, boolean prettyPrint) throws IOException {

        Job pollJob = this.getBigquery().jobs().get(projectId, jobId).setOauthToken(this.getOAuthToken()).execute();

        String status = pollJob.getStatus().getState();

        if (!GoogleMetaData.DONE.equals(status)) {
            pollJob = null;
            log.warn("Job has not completed yet, skipping. JobId: " + jobId);

        } else {
            if (prettyPrint) {
                log.debug("Job completed successfully" + pollJob.toPrettyString());
            }
            this.printJobStats(pollJob);
        }

        return pollJob;
    }

    /**
     * Retry if a bigquery job has failed due to transient errors
     * {
    "configuration" : {
      "query" : {
        "createDisposition" : "CREATE_IF_NEEDED",
        "destinationTable" : {
          "datasetId" : "_f14a24df5a43859914cb508177aa01d64466d055",
          "projectId" : "ecarf-1000",
          "tableId" : "anon3fe271d7c2fafca6fbe9e0490a1488c103a3a8fd"
        },
        "query" : "select subject,object from [ontologies.swetodblp@-221459-] where predicate=\"&lt;http://lsdis.cs.uga.edu/projects/semdis/opus#last_modified_date&gt;\";",
        "writeDisposition" : "WRITE_TRUNCATE"
      }
    },
    "etag" : "\"lJkBaCYfTrFXwh5N7-r9owDp5yw/O-f9DO_VlENTr60IoQaXlNb3dFQ\"",
    "id" : "ecarf-1000:job_QGAraWZPcZLYbm6IhmIyT7aOhmQ",
    "jobReference" : {
      "jobId" : "job_QGAraWZPcZLYbm6IhmIyT7aOhmQ",
      "projectId" : "ecarf-1000"
    },
    "kind" : "bigquery#job",
    "selfLink" : "https://www.googleapis.com/bigquery/v2/projects/ecarf-1000/jobs/job_QGAraWZPcZLYbm6IhmIyT7aOhmQ",
    "statistics" : {
      "creationTime" : "1399203726826",
      "endTime" : "1399203727264",
      "startTime" : "1399203727120"
    },
    "status" : {
      "errorResult" : {
        "message" : "Connection error. Please try again.",
        "reason" : "backendError"
      },
      "errors" : [ {
        "message" : "Connection error. Please try again.",
        "reason" : "backendError"
      } ],
      "state" : "DONE"
    }
      }
     * @throws IOException
     */
    protected Job retryFailedBigQueryJob(Job job) throws IOException {
        String jobId = job.getJobReference().getJobId();
        log.debug("Retrying failed job: " + jobId);
        log.debug("Error result" + job.getStatus().getErrorResult());
        JobConfiguration config = job.getConfiguration();
        //String newCompletedJobId = null;
        if ((config != null) && (config.getQuery() != null)) {
            // get the query
            String query = config.getQuery().getQuery();
            // re-execute the query
            ApiUtils.block(this.getApiRecheckDelay());

            BigDataTable table = null;

            if (BooleanUtils.isTrue(config.getQuery().getAllowLargeResults())
                    && (config.getQuery().getDestinationTable() != null)) {

                TableReference tableRef = config.getQuery().getDestinationTable();
                table = new BigDataTable(
                        tableRef.getDatasetId() + '.' + StringUtils.split(tableRef.getTableId(), '_')[0]);
            }

            String newJobId = startBigDataQuery(query, table);
            ApiUtils.block(this.getApiRecheckDelay());
            job = waitForBigQueryJobResults(newJobId, false, false);
        }
        return job;
    }

    /**
     * Print the stats of a big query job
     * @param job
     */
    protected void printJobStats(Job job) {
        try {
            JobStatistics stats = job.getStatistics();
            JobConfiguration config = job.getConfiguration();
            // log the query
            if (config != null) {
                log.debug("query: " + (config.getQuery() != null ? config.getQuery().getQuery() : ""));
            }
            // log the total bytes processed
            JobStatistics2 qStats = stats.getQuery();
            if (qStats != null) {
                log.debug("Total Bytes processed: " + ((double) qStats.getTotalBytesProcessed() / FileUtils.ONE_GB)
                        + " GB");
                log.debug("Cache hit: " + qStats.getCacheHit());
            }

            JobStatistics3 lStats = stats.getLoad();
            if (lStats != null) {
                log.debug("Output rows: " + lStats.getOutputRows());

            }

            long time = stats.getEndTime() - stats.getCreationTime();
            log.debug("Elapsed query time (ms): " + time);
            log.debug("Elapsed query time (s): " + TimeUnit.MILLISECONDS.toSeconds(time));

        } catch (Exception e) {
            log.warn("failed to log job stats", e);
        }
    }

    /**
     * Find the records written from the job statistics
     * "queryPlan" : [ {
    "computeRatioAvg" : 0.8558157510920105,
    "computeRatioMax" : 1.0,
    "id" : "1",
    "name" : "Stage 1",
    "readRatioAvg" : 0.06898310223119479,
    "readRatioMax" : 0.08398906138792274,
    "recordsRead" : "14936600",
    "recordsWritten" : "8091263",
    "steps" : [ {
      "kind" : "READ",
      "substeps" : [ "object, predicate, subject", "FROM ontologies.swetodblp1", "WHERE LOGICAL_OR(LOGICAL_AND(EQUAL(predicate, 0), ...), ...)" ]
    }, {
      "kind" : "WRITE",
      "substeps" : [ "object, predicate, subject", "TO __output" ]
    } ],
    "waitRatioAvg" : 0.013799600438590943,
    "waitRatioMax" : 0.013799600438590943,
    "writeRatioAvg" : 0.3758086421588464,
    "writeRatioMax" : 0.49154118427586363
      } ],
     *
     * @param job
     * @return
     */
    protected Long getBigQueryResultRows(Job job) {
        Long rows = null;
        if (job.getStatistics() != null && job.getStatistics().getQuery() != null
                && job.getStatistics().getQuery().getQueryPlan() != null) {

            List<ExplainQueryStage> explains = job.getStatistics().getQuery().getQueryPlan();

            if (explains.size() > 0) {
                ExplainQueryStage stage = explains.get(0);
                rows = stage.getRecordsWritten();
            }

        }

        return rows;
    }

    /**
     * Get a page of Bigquery rows
     * CONFIG: {
       "kind": "bigquery#job",
       "etag": "\"QPJfVWBscaHhAhSLq0k5xRS6X5c/eSppPGGASS7YbZBbC4v1q6lTcGM\"",
       "id": "ecarf-1000:job_CaN3ROCFJdK30hBl7GBMmnvspgc",
       "selfLink": "https://www.googleapis.com/bigquery/v2/projects/ecarf-1000/jobs/job_CaN3ROCFJdK30hBl7GBMmnvspgc",
       "jobReference": {
    "projectId": "ecarf-1000",
    "jobId": "job_CaN3ROCFJdK30hBl7GBMmnvspgc"
       },
       "configuration": {
    "query": {
     "query": "select subject from table where object = \"blah\";",
     "destinationTable": {
      "projectId": "ecarf-1000",
      "datasetId": "_f14a24df5a43859914cb508177aa01d64466d055",
      "tableId": "anonc6f8ec7bfe8bbd6bd76bbac6ad8db54482cd8209"
     },
     "createDisposition": "CREATE_IF_NEEDED",
     "writeDisposition": "WRITE_TRUNCATE"
    }
       },
       "status": {
    "state": "DONE"
       },
       "statistics": {
    "creationTime": "1398030237040",
    "startTime": "1398030237657",
    "endTime": "1398030237801",
    "totalBytesProcessed": "0",
    "query": {
     "totalBytesProcessed": "0",
     "cacheHit": true
    }
       }
      }
     * @return
     * @throws IOException
     */
    protected GetQueryResultsResponse getQueryResults(String jobId, String pageToken) throws IOException {
        int retries = 3;
        boolean retrying = false;
        GetQueryResultsResponse queryResults = null;
        do {
            try {
                queryResults = this.getBigquery().jobs().getQueryResults(projectId, jobId).setPageToken(pageToken)
                        .setOauthToken(this.getOAuthToken()).execute();

                retrying = false;

            } catch (IOException e) {
                log.error("failed to query job", e);
                retries--;
                if (retries == 0) {
                    throw e;
                }
                int delay = this.getApiRecheckDelay();
                log.debug("Retrying again in " + delay + " seconds");
                ApiUtils.block(delay);
                retrying = true;
            }

        } while (retrying);

        return queryResults;
    }

    /**
     * A job that extracts data from a table.
     * @param cloudStorageUris Cloud storage file URIs
     * @param table Table to extract from
     * @return The job to extract data from the table
     * @throws IOException Thrown if error connceting to Bigtable
     */
    // [START extract_job]
    protected Job runBigQueryExtractJob(final List<String> cloudStorageUris, final TableReference table)
            throws IOException {

        log.debug("Saving table: " + table + ", to cloud storage files: " + cloudStorageUris);

        //https://cloud.google.com/bigquery/exporting-data-from-bigquery
        JobConfigurationExtract extract = new JobConfigurationExtract().setSourceTable(table)
                .setDestinationFormat("CSV").setCompression("GZIP").setDestinationUris(cloudStorageUris);

        return this.getBigquery().jobs()
                .insert(table.getProjectId(),
                        new Job().setConfiguration(new JobConfiguration().setExtract(extract)))
                .setOauthToken(this.getOAuthToken()).execute();
    }

    @Override
    public QueryStats saveBigQueryResultsToFile(String jobId, String filename, String bucket, Integer minFiles,
            int directDownloadRowLimit) throws IOException {

        Job queryJob = this.waitForBigQueryJobResults(jobId, false, true);
        Long rows = this.getBigQueryResultRows(queryJob);
        QueryStats stats = null;

        if (rows == null) {
            // cached query?
            rows = 0L;
        }

        log.debug("Downloading " + rows + " rows from BigQuery for jobId: " + jobId);

        if (rows > directDownloadRowLimit) {

            stats = this.saveBigQueryResultsToCloudStorage(jobId, queryJob, bucket, filename, minFiles);

        } else {

            stats = this.saveBigQueryResultsToFile(jobId, queryJob, FileUtils.TEMP_FOLDER + filename);
        }

        return stats;
    }

    @Override
    public QueryStats saveBigQueryResultsToCloudStorage(String jobId, String bucket, String filename)
            throws IOException {
        return this.saveBigQueryResultsToCloudStorage(jobId, null, bucket, filename, null);
    }

    /**
     *
     * @param jobId
     * @param queryJob
     * @param bucket
     * @param filename
     * @param minFiles
     * @return
     * @throws IOException
     */
    protected QueryStats saveBigQueryResultsToCloudStorage(String jobId, Job queryJob, String bucket,
            String filename, Integer minFiles) throws IOException {

        // wait for the query job to complete
        if (queryJob == null) {
            queryJob = this.waitForBigQueryJobResults(jobId, true, false);
        }

        Stopwatch stopwatch = Stopwatch.createStarted();

        QueryStats stats = new QueryStats();

        // get the temporary table details
        TableReference table = queryJob.getConfiguration().getQuery().getDestinationTable();

        if (queryJob.getStatistics() != null) {
            stats.setTotalProcessedBytes(queryJob.getStatistics().getTotalBytesProcessed());
            Long rows = this.getBigQueryResultRows(queryJob);
            if (rows != null) {
                stats.setTotalRows(BigInteger.valueOf(rows));
            }
        }

        List<String> cloudStorageUris = new ArrayList<>();

        if ((minFiles == null) || (minFiles <= 1)) {
            // use single wildcard URI
            // cloudex-processor-1456752400618_1456752468023_QueryResults_0_*
            cloudStorageUris.add(CLOUD_STORAGE_PREFIX + bucket + "/" + filename + WILDCARD_SUFFIX);

        } else {
            // Multiple wildcard URIs
            // cloudex-processor-1456752400618_1456752468023_QueryResults_0_1_*
            // cloudex-processor-1456752400618_1456752468023_QueryResults_0_2_*
            String baseUri = CLOUD_STORAGE_PREFIX + bucket + "/" + filename + "_";

            for (int i = 1; i <= minFiles; i++) {

                cloudStorageUris.add(baseUri + i + WILDCARD_SUFFIX);
            }
        }

        Job extractJob = runBigQueryExtractJob(cloudStorageUris, table);

        extractJob = this.waitForBigQueryJobResults(extractJob.getJobReference().getJobId(), false, true);

        // list the relevant export files in cloud storage i.e.
        // cloudex-processor-1456752400618_1456752468023_QueryResults_0_000000000000
        // cloudex-processor-1456752400618_1456752468023_QueryResults_0_000000000001
        // cloudex-processor-1456752400618_1456752468023_QueryResults_0_000000000002
        List<io.cloudex.framework.cloud.entities.StorageObject> objects = this.listCloudStorageObjects(bucket);

        List<String> files = new ArrayList<>();

        for (io.cloudex.framework.cloud.entities.StorageObject object : objects) {
            String name = object.getName();
            if (name.startsWith(filename)) {
                files.add(name);
            }
        }

        for (String file : files) {

            log.debug("Downloading query results file: " + file);

            String localFile = FileUtils.TEMP_FOLDER + file;

            this.downloadObjectFromCloudStorage(file, localFile, bucket);

            stats.getOutputFiles().add(localFile);
        }

        log.debug("BigQuery query data saved successfully, timer: " + stopwatch);

        return stats;
    }

    @Override
    public QueryStats saveBigQueryResultsToFile(String jobId, String filename) throws IOException {
        return this.saveBigQueryResultsToFile(jobId, null, filename);
    }

    /**
     * Polls a big data job and once done save the results to a file
     * @param jobId
     * @param filename
     * @throws IOException
     * FIXME rename to saveBigDataResultsToFile
     */
    public QueryStats saveBigQueryResultsToFile(String jobId, Job queryJob, String filename) throws IOException {
        // query with retry support
        String completedJob;

        if (queryJob == null) {
            completedJob = checkBigQueryJobResults(jobId, true, false);
        } else {
            completedJob = queryJob.getJobReference().getJobId();
        }

        Joiner joiner = Joiner.on(',');
        String pageToken = null;
        BigInteger totalRows = null;
        Long totalBytes = null;
        Integer numFields = null;

        Stopwatch stopwatch = Stopwatch.createStarted();

        try (PrintWriter writer = new PrintWriter(new FileOutputStream(filename))) {

            do {

                GetQueryResultsResponse queryResult = this.getQueryResults(completedJob, pageToken);

                pageToken = queryResult.getPageToken();
                log.debug("Page token: " + pageToken);

                if (totalRows == null) {
                    totalRows = queryResult.getTotalRows();
                    numFields = queryResult.getSchema().getFields().size();
                    totalBytes = queryResult.getTotalBytesProcessed();
                    log.debug("Total rows for query: " + totalRows);
                }

                List<TableRow> rows = queryResult.getRows();

                if (rows != null) {
                    log.debug("Saving " + rows.size() + ", records to file: " + filename);

                    // save as CSV and properly escape the data to avoid failures on parsing
                    // one field only
                    if (numFields == 1) {
                        for (TableRow row : rows) {
                            writer.println(StringEscapeUtils.escapeCsv((String) row.getF().get(0).getV()));
                        }

                    } else {
                        // multiple fields
                        for (TableRow row : rows) {

                            List<Object> fields = new ArrayList<>();

                            for (TableCell field : row.getF()) {

                                if (Data.isNull(field.getV())) {

                                    fields.add("");

                                } else {

                                    fields.add(StringEscapeUtils.escapeCsv((String) field.getV()));
                                }
                            }
                            writer.println(joiner.join(fields));
                        }
                    }
                }

            } while ((pageToken != null) && !BigInteger.ZERO.equals(totalRows));
        }

        log.debug("BigQuery query data saved successfully, timer: " + stopwatch);

        QueryStats stats = new QueryStats(totalRows, totalBytes);
        stats.getOutputFiles().add(filename);
        return stats;
    }

    /* (non-Javadoc)
     * @see io.cloudex.framework.cloud.api.CloudService#getMaximumMetaDataSize()
     */
    @Override
    public int getMaximumMetaDataSize() {
        return maximumMetaDataSize;
    }

    /* (non-Javadoc)
     * @see io.cloudex.framework.cloud.api.CloudService#getApiRecheckDelay()
     */
    @Override
    public int getApiRecheckDelay() {
        return apiRecheckDelay;
    }

    /**
     * @param maximumMetaDataSize the maximumMetaDataSize to set
     */
    @Override
    public void setMaximumMetaDataSize(int maximumMetaDataSize) {
        this.maximumMetaDataSize = maximumMetaDataSize;
    }

    /**
     * @param apiRecheckDelay the apiRecheckDelay to set
     */
    @Override
    public void setApiRecheckDelay(int apiRecheckDelay) {
        this.apiRecheckDelay = apiRecheckDelay;
    }

    /* (non-Javadoc)
     * @see java.lang.Object#toString()
     */
    @Override
    public String toString() {
        return new ToStringBuilder(this).append("projectId", this.projectId).append("instanceId", this.instanceId)
                .append("zone", this.zone).toString();
    }

    /**
     * @param projectId the projectId to set
     */
    public void setProjectId(String projectId) {
        this.projectId = projectId;
    }

    /**
     * @param zone the zone to set
     */
    public void setZone(String zone) {
        this.zone = zone;
    }

    /**
     * @param serviceAccount the serviceAccount to set
     */
    public void setServiceAccount(String serviceAccount) {
        this.serviceAccount = serviceAccount;
    }

    /**
     * @param scopes the scopes to set
     */
    public void setScopes(List<String> scopes) {
        this.scopes = scopes;
    }

    /**
     * @param instanceId the instanceId to set
     */
    public void setInstanceId(String instanceId) {
        this.instanceId = instanceId;
    }

    /**
     * @return the instanceId
     */
    @Override
    public String getInstanceId() {
        return instanceId;
    }

    /**
     * @return the projectId
     */
    public String getProjectId() {
        return projectId;
    }

    /**
     * @return the zone
     */
    public String getZone() {
        return zone;
    }

    /**
     * @param maxBigQueryRequestSize the maxBigQueryRequestSize to set
     */
    public void setMaxBigQueryRequestSize(int maxBigQueryRequestSize) {
        this.maxBigQueryRequestSize = maxBigQueryRequestSize;
    }

    /**
     * @param bigQueryStreamDelay the bigQueryStreamDelay to set
     */
    public void setBigQueryStreamDelay(int bigQueryStreamDelay) {
        this.bigQueryStreamDelay = bigQueryStreamDelay;
    }

    /**
     * @param bigQueryStreamFailRetries the bigQueryStreamFailRetries to set
     */
    public void setBigQueryStreamFailRetries(int bigQueryStreamFailRetries) {
        this.bigQueryStreamFailRetries = bigQueryStreamFailRetries;
    }

    @Override
    public void setRemote(boolean remote) {
        this.remote = remote;
    }

    @SuppressWarnings("rawtypes")
    @Override
    public void setAuthenticationProvider(AuthenticationProvider provider) {
        this.authenticationProvider = provider;

    }

}