Java tutorial
/** * Copyright 2013 Google Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.cloud.hadoop.gcsio; import com.google.api.client.auth.oauth2.Credential; import com.google.api.client.googleapis.batch.json.JsonBatchCallback; import com.google.api.client.googleapis.json.GoogleJsonError; import com.google.api.client.http.ByteArrayContent; import com.google.api.client.http.HttpHeaders; import com.google.api.client.http.HttpRequestInitializer; import com.google.api.client.http.HttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.client.json.jackson2.JacksonFactory; import com.google.api.client.util.BackOff; import com.google.api.client.util.Data; import com.google.api.client.util.ExponentialBackOff; import com.google.api.client.util.Sleeper; import com.google.api.services.storage.Storage; import com.google.api.services.storage.Storage.Objects.Compose; import com.google.api.services.storage.model.Bucket; import com.google.api.services.storage.model.Buckets; import com.google.api.services.storage.model.ComposeRequest; import com.google.api.services.storage.model.ComposeRequest.SourceObjects; import com.google.api.services.storage.model.Objects; import com.google.api.services.storage.model.StorageObject; import com.google.cloud.hadoop.util.ApiErrorExtractor; import com.google.cloud.hadoop.util.ClientRequestHelper; import com.google.cloud.hadoop.util.HttpTransportFactory; import com.google.cloud.hadoop.util.ResilientOperation; import com.google.cloud.hadoop.util.RetryDeterminer; import com.google.cloud.hadoop.util.RetryHttpInitializer; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.io.BaseEncoding; import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.channels.SeekableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.FileAlreadyExistsException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Provides read/write access to Google Cloud Storage (GCS), using Java nio channel semantics. * This is a basic implementation of the GoogleCloudStorage interface that mostly delegates through * to the appropriate API call(s) via the generated JSON API client, while adding reliability and * performance features such as setting up low-level retries, translating low-level exceptions, * and request batching. */ public class GoogleCloudStorageImpl implements GoogleCloudStorage { // Pseudo path delimiter. // // GCS does not implement full concept of file system paths but it does expose // some notion of a delimiter that can be used with Storage.Objects.List to // control which items are listed. public static final String PATH_DELIMITER = "/"; // Number of retries to make when waiting for a bucket to be empty. public static final int BUCKET_EMPTY_MAX_RETRIES = 20; // Duration of wait (in milliseconds) per retry for a bucket to be empty. public static final int BUCKET_EMPTY_WAIT_TIME_MS = 500; // JSON factory used for formatting GCS JSON API payloads. private static final JsonFactory JSON_FACTORY = new JacksonFactory(); // Logger. private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorage.class); // Maximum number of times to retry deletes in the case of precondition failures. private static final int MAXIMUM_PRECONDITION_FAILURES_IN_DELETE = 4; // A function to encode metadata map values private static final Function<byte[], String> ENCODE_METADATA_VALUES = new Function<byte[], String>() { @Override public String apply(byte[] bytes) { if (bytes == null) { return Data.NULL_STRING; } else { return BaseEncoding.base64().encode(bytes); } } }; private static final Function<String, byte[]> DECODE_METADATA_VALUES = new Function<String, byte[]>() { @Override public byte[] apply(String value) { try { return BaseEncoding.base64().decode(value); } catch (IllegalArgumentException iae) { LOG.error("Failed to parse base64 encoded attribute value {} - {}", value, iae); return null; } } }; /** * A factory for producing BackOff objects. */ public static interface BackOffFactory { public static final BackOffFactory DEFAULT = new BackOffFactory() { @Override public BackOff newBackOff() { return new ExponentialBackOff(); } }; BackOff newBackOff(); } // GCS access instance. private Storage gcs; // Thread-pool used for background tasks. private ExecutorService threadPool = Executors.newCachedThreadPool( new ThreadFactoryBuilder().setNameFormat("gcs-async-channel-pool-%d").setDaemon(true).build()); // Thread-pool for manual matching of metadata tasks. // TODO(user): Wire out GoogleCloudStorageOptions for these. private ExecutorService manualBatchingThreadPool = new ThreadPoolExecutor(10 /* base num threads */, 20 /* max num threads */, 10L /* keepalive time */, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new ThreadFactoryBuilder().setNameFormat("gcs-manual-batching-pool-%d").setDaemon(true).build()); // Helper delegate for turning IOExceptions from API calls into higher-level semantics. private ApiErrorExtractor errorExtractor = new ApiErrorExtractor(); // Helper for interacting with objects invovled with the API client libraries. private ClientRequestHelper<StorageObject> clientRequestHelper = new ClientRequestHelper<>(); // Factory for BatchHelpers setting up BatchRequests; can be swapped out for testing purposes. private BatchHelper.Factory batchFactory = new BatchHelper.Factory(); // Request initializer to use for batch and non-batch requests. private HttpRequestInitializer httpRequestInitializer; // Configuration values for this instance private final GoogleCloudStorageOptions storageOptions; // Object to use to perform sleep operations private Sleeper sleeper = Sleeper.DEFAULT; // BackOff objects are per-request, use this to make new ones. private BackOffFactory backOffFactory = BackOffFactory.DEFAULT; // Determine if a given IOException is due to rate-limiting. private RetryDeterminer<IOException> rateLimitedRetryDeterminer = RetryDeterminer .createRateLimitedRetryDeterminer(errorExtractor); /** * Constructs an instance of GoogleCloudStorageImpl. * * @param credential OAuth2 credential that allows access to GCS * @throws IOException on IO error */ public GoogleCloudStorageImpl(GoogleCloudStorageOptions options, Credential credential) throws IOException { Preconditions.checkArgument(options != null, "options must not be null"); LOG.debug("GCS({})", options.getAppName()); options.throwIfNotValid(); this.storageOptions = options; Preconditions.checkArgument(credential != null, "credential must not be null"); this.httpRequestInitializer = new RetryHttpInitializer(credential, options.getAppName()); HttpTransport httpTransport = HttpTransportFactory.createHttpTransport(options.getTransportType(), options.getProxyAddress()); // Create GCS instance. this.gcs = new Storage.Builder(httpTransport, JSON_FACTORY, httpRequestInitializer) .setApplicationName(options.getAppName()).build(); } /** * Constructs an instance of GoogleCloudStorageImpl. * * @param gcs Preconstructed Storage to use for I/O. */ public GoogleCloudStorageImpl(GoogleCloudStorageOptions options, Storage gcs) { Preconditions.checkArgument(options != null, "options must not be null"); LOG.debug("GCS({})", options.getAppName()); options.throwIfNotValid(); this.storageOptions = options; Preconditions.checkArgument(gcs != null, "gcs must not be null"); this.gcs = gcs; if (gcs != null && gcs.getRequestFactory() != null) { this.httpRequestInitializer = gcs.getRequestFactory().getInitializer(); } } @Override public GoogleCloudStorageOptions getOptions() { return storageOptions; } @VisibleForTesting protected GoogleCloudStorageImpl() { this.storageOptions = GoogleCloudStorageOptions.newBuilder().build(); } @VisibleForTesting void setThreadPool(ExecutorService threadPool) { this.threadPool = threadPool; } @VisibleForTesting void setManualBatchingThreadPool(ExecutorService manualBatchingThreadPool) { this.manualBatchingThreadPool = manualBatchingThreadPool; } @VisibleForTesting void setErrorExtractor(ApiErrorExtractor errorExtractor) { this.errorExtractor = errorExtractor; this.rateLimitedRetryDeterminer = RetryDeterminer.createRateLimitedRetryDeterminer(errorExtractor); } @VisibleForTesting void setClientRequestHelper(ClientRequestHelper<StorageObject> clientRequestHelper) { this.clientRequestHelper = clientRequestHelper; } @VisibleForTesting void setBatchFactory(BatchHelper.Factory batchFactory) { this.batchFactory = batchFactory; } @VisibleForTesting GoogleCloudStorageOptions getStorageOptions() { return this.storageOptions; } @VisibleForTesting void setSleeper(Sleeper sleeper) { this.sleeper = sleeper; } @VisibleForTesting void setBackOffFactory(BackOffFactory factory) { backOffFactory = factory; } @Override public WritableByteChannel create(StorageResourceId resourceId, CreateObjectOptions options) throws IOException { LOG.debug("create({})", resourceId); Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); /** * When performing mutations in GCS, even when we aren't concerned with parallel writers, * we need to protect ourselves from what appear to be out-of-order writes to the writer. These * most commonly manifest themselves as a sequence of: * 1) Perform mutation M1 on object O1, which results in an HTTP 503 error, * but can be any 5XX class error. * 2) Retry mutation M1, which yields a 200 OK * 3) Perform mutation M2 on O1, which yields a 200 OK * 4) Some time later, get O1 and see M1 and not M2, even though M2 appears to have happened * later. * * To counter this we need to perform mutations with a condition attached, always. * * To perform a mutation with a condition, we first must get the content generation of the * current object. Once we have the current generation, we will create a marker file * conditionally with an ifGenerationMatch. We will then create the final object only if the * generation matches the marker file. */ // TODO(user): Have createEmptyObject return enough information to use that instead. Optional<Long> overwriteGeneration = Optional.absent(); long backOffSleep = 0L; if (storageOptions.isMarkerFileCreationEnabled()) { BackOff backOff = backOffFactory.newBackOff(); do { if (backOffSleep != 0) { try { sleeper.sleep(backOffSleep); } catch (InterruptedException ie) { throw new IOException(String .format("Interrupted while sleeping for backoff in create of %s", resourceId)); } } backOffSleep = backOff.nextBackOffMillis(); Storage.Objects.Insert insertObject = prepareEmptyInsert(resourceId, options); // If resourceId.hasHasGenerationId(), we'll expect the underlying prepareEmptyInsert // to already set the setIfGenerationMatch; otherwise we must explicitly fetch the // current generationId here. if (!resourceId.hasGenerationId()) { insertObject.setIfGenerationMatch(getWriteGeneration(resourceId, options.overwriteExisting())); } try { StorageObject result = insertObject.execute(); overwriteGeneration = Optional.of(result.getGeneration()); } catch (IOException ioe) { if (errorExtractor.preconditionNotMet(ioe)) { LOG.info("Retrying marker file creation. Retrying according to backoff policy, %s - %s", resourceId, ioe); } else { throw ioe; } } } while (!overwriteGeneration.isPresent() && backOffSleep != BackOff.STOP); if (backOffSleep == BackOff.STOP) { throw new IOException(String .format("Retries exhausted while attempting to create marker file for %s", resourceId)); } } else { // Do not use a marker-file if (resourceId.hasGenerationId()) { overwriteGeneration = Optional.of(resourceId.getGenerationId()); } else { overwriteGeneration = Optional.of(getWriteGeneration(resourceId, options.overwriteExisting())); } } ObjectWriteConditions writeConditions = new ObjectWriteConditions(overwriteGeneration, Optional.<Long>absent()); Map<String, String> rewrittenMetadata = encodeMetadata(options.getMetadata()); GoogleCloudStorageWriteChannel channel = new GoogleCloudStorageWriteChannel(threadPool, gcs, clientRequestHelper, resourceId.getBucketName(), resourceId.getObjectName(), storageOptions.getWriteChannelOptions(), writeConditions, rewrittenMetadata, options.getContentType()); channel.initialize(); return channel; } /** * See {@link GoogleCloudStorage#create(StorageResourceId)} for details about expected behavior. */ @Override public WritableByteChannel create(StorageResourceId resourceId) throws IOException { LOG.debug("create({})", resourceId); Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); return create(resourceId, CreateObjectOptions.DEFAULT); } @Override public void createEmptyObject(StorageResourceId resourceId, CreateObjectOptions options) throws IOException { Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); Storage.Objects.Insert insertObject = prepareEmptyInsert(resourceId, options); try { insertObject.execute(); } catch (IOException ioe) { if (canIgnoreExceptionForEmptyObject(ioe, resourceId, options)) { LOG.info("Ignoring exception; verified object already exists with desired state.", ioe); } else { throw ioe; } } } /** * See {@link GoogleCloudStorage#createEmptyObject(StorageResourceId)} for details about * expected behavior. */ @Override public void createEmptyObject(StorageResourceId resourceId) throws IOException { LOG.debug("createEmptyObject({})", resourceId); Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); createEmptyObject(resourceId, CreateObjectOptions.DEFAULT); } @Override public void createEmptyObjects(List<StorageResourceId> resourceIds, final CreateObjectOptions options) throws IOException { // TODO(user): This method largely follows a pattern similar to // deleteObjects(List<StorageResourceId>); extract a generic method for both. LOG.debug("createEmptyObjects({})", resourceIds); // Validate that all the elements represent StorageObjects. for (StorageResourceId resourceId : resourceIds) { Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject names only, got: '%s'", resourceId); } // Gather exceptions to wrap in a composite exception at the end. final List<IOException> innerExceptions = Collections.synchronizedList(new ArrayList<IOException>()); final CountDownLatch latch = new CountDownLatch(resourceIds.size()); for (final StorageResourceId resourceId : resourceIds) { final Storage.Objects.Insert insertObject = prepareEmptyInsert(resourceId, options); manualBatchingThreadPool.execute(new Runnable() { @Override public void run() { try { insertObject.execute(); LOG.debug("Successfully inserted {}", resourceId.toString()); } catch (IOException ioe) { boolean canIgnoreException = false; try { canIgnoreException = canIgnoreExceptionForEmptyObject(ioe, resourceId, options); } catch (Throwable t) { // Make sure to catch Throwable instead of only IOException so that we can // correctly wrap other such throwables and propagate them out cleanly inside // innerExceptions; common sources of non-IOExceptions include Preconditions // checks which get enforced at varous layers in the library stack. IOException toWrap = (t instanceof IOException ? (IOException) t : new IOException(t)); innerExceptions.add(wrapException(toWrap, "Error re-fetching after rate-limit error", resourceId.getBucketName(), resourceId.getObjectName())); } if (canIgnoreException) { LOG.info("Ignoring exception; verified object already exists with desired state.", ioe); } else { innerExceptions.add(wrapException(ioe, "Error inserting", resourceId.getBucketName(), resourceId.getObjectName())); } } catch (Throwable t) { innerExceptions.add(wrapException(new IOException(t), "Error inserting", resourceId.getBucketName(), resourceId.getObjectName())); } finally { latch.countDown(); } } }); } try { latch.await(); } catch (InterruptedException ie) { throw new IOException(ie); } if (!innerExceptions.isEmpty()) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } } /** * See {@link GoogleCloudStorage#createEmptyObjects(List)} for details about * expected behavior. */ @Override public void createEmptyObjects(List<StorageResourceId> resourceIds) throws IOException { createEmptyObjects(resourceIds, CreateObjectOptions.DEFAULT); } /** * See {@link GoogleCloudStorage#open(StorageResourceId)} for details about expected behavior. */ @Override public SeekableByteChannel open(StorageResourceId resourceId) throws IOException { return open(resourceId, GoogleCloudStorageReadOptions.DEFAULT); } /** * See {@link GoogleCloudStorage#open(StorageResourceId)} for details about expected behavior. */ @Override public SeekableByteChannel open(StorageResourceId resourceId, GoogleCloudStorageReadOptions readOptions) throws IOException { LOG.debug("open({}, {})", resourceId, readOptions); Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); // The underlying channel doesn't initially read data, which means that we won't see a // FileNotFoundException until read is called. As a result, in order to find out if the object // exists, we'll need to do an RPC (metadata or data). A metadata check should be a less // expensive operation than a read data operation. if (readOptions.getFastFailOnNotFound()) { if (!getItemInfo(resourceId).exists()) { throw GoogleCloudStorageExceptions.getFileNotFoundException(resourceId.getBucketName(), resourceId.getObjectName()); } } return new GoogleCloudStorageReadChannel(gcs, resourceId.getBucketName(), resourceId.getObjectName(), errorExtractor, clientRequestHelper, readOptions); } /** * See {@link GoogleCloudStorage#create(String)} for details about expected * behavior. */ @Override public void create(String bucketName) throws IOException { create(bucketName, CreateBucketOptions.DEFAULT); } /** * See {@link GoogleCloudStorage#create(String, CreateBucketOptions)} for * details about expected behavior. */ @Override public void create(String bucketName, CreateBucketOptions options) throws IOException { LOG.debug("create({})", bucketName); Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName), "bucketName must not be null or empty"); Preconditions.checkNotNull(options, "options must not be null"); Bucket bucket = new Bucket(); bucket.setName(bucketName); bucket.setLocation(options.getLocation()); bucket.setStorageClass(options.getStorageClass()); Storage.Buckets.Insert insertBucket = gcs.buckets().insert(storageOptions.getProjectId(), bucket); // TODO(user): To match the behavior of throwing FileNotFoundException for 404, we probably // want to throw org.apache.commons.io.FileExistsException for 409 here. try { ResilientOperation.retry(ResilientOperation.getGoogleRequestCallable(insertBucket), backOffFactory.newBackOff(), rateLimitedRetryDeterminer, IOException.class, sleeper); } catch (InterruptedException e) { throw new IOException(e); // From sleep } } /** * See {@link GoogleCloudStorage#deleteBuckets(List<String>)} for details about expected behavior. */ @Override public void deleteBuckets(List<String> bucketNames) throws IOException { LOG.debug("deleteBuckets({})", bucketNames.toString()); // Validate all the inputs first. for (String bucketName : bucketNames) { Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName), "bucketName must not be null or empty"); } // Gather exceptions to wrap in a composite exception at the end. final List<IOException> innerExceptions = new ArrayList<>(); for (final String bucketName : bucketNames) { final Storage.Buckets.Delete deleteBucket = gcs.buckets().delete(bucketName); try { ResilientOperation.retry(ResilientOperation.getGoogleRequestCallable(deleteBucket), backOffFactory.newBackOff(), rateLimitedRetryDeterminer, IOException.class, sleeper); } catch (IOException ioe) { if (errorExtractor.itemNotFound(ioe)) { LOG.debug("delete({}) : not found", bucketName); innerExceptions.add(GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, null)); } else { innerExceptions.add( wrapException(new IOException(ioe.toString()), "Error deleting", bucketName, null)); } } catch (InterruptedException e) { throw new IOException(e); // From sleep } } if (innerExceptions.size() > 0) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } } /** * See {@link GoogleCloudStorage#deleteObjects(List<StorageResourceId>)} for details about * expected behavior. */ @Override public void deleteObjects(List<StorageResourceId> fullObjectNames) throws IOException { LOG.debug("deleteObjects({})", fullObjectNames.toString()); // Validate that all the elements represent StorageObjects. for (StorageResourceId fullObjectName : fullObjectNames) { Preconditions.checkArgument(fullObjectName.isStorageObject(), "Expected full StorageObject names only, got: " + fullObjectName.toString()); } // Gather exceptions to wrap in a composite exception at the end. final List<IOException> innerExceptions = new ArrayList<>(); BatchHelper batchHelper = batchFactory.newBatchHelper(httpRequestInitializer, gcs, storageOptions.getMaxRequestsPerBatch()); for (final StorageResourceId fullObjectName : fullObjectNames) { queueSingleObjectDelete(fullObjectName, innerExceptions, batchHelper, 1); } do { batchHelper.flush(); } while (!batchHelper.isEmpty()); if (innerExceptions.size() > 0) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } } /** * Helper to create a callback for a particular deletion request. */ private JsonBatchCallback<Void> getDeletionCallback(final StorageResourceId fullObjectName, final List<IOException> innerExceptions, final BatchHelper batchHelper, final int attempt, final long generation) { final String bucketName = fullObjectName.getBucketName(); final String objectName = fullObjectName.getObjectName(); return new JsonBatchCallback<Void>() { @Override public void onSuccess(Void obj, HttpHeaders responseHeaders) { LOG.debug("Successfully deleted {} at generation {}", fullObjectName.toString(), generation); } @Override public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException { if (errorExtractor.itemNotFound(e)) { // Ignore item-not-found errors. We do not have to delete what we cannot find. This // error typically shows up when we make a request to delete something and the server // receives the request but we get a retry-able error before we get a response. // During a retry, we no longer find the item because the server had deleted // it already. LOG.debug("deleteObjects({}) : delete not found", fullObjectName.toString()); } else if (errorExtractor.preconditionNotMet(e) && attempt <= MAXIMUM_PRECONDITION_FAILURES_IN_DELETE) { LOG.info("Precondition not met while deleting {} at generation {}. Attempt {}. Retrying.", fullObjectName.toString(), generation, attempt); queueSingleObjectDelete(fullObjectName, innerExceptions, batchHelper, attempt + 1); } else { innerExceptions.add(wrapException(new IOException(e.toString()), String.format("Error deleting, stage 2 with generation %s", generation), bucketName, objectName)); } } }; } private void queueSingleObjectDelete(final StorageResourceId fullObjectName, final List<IOException> innerExceptions, final BatchHelper batchHelper, final int attempt) throws IOException { final String bucketName = fullObjectName.getBucketName(); final String objectName = fullObjectName.getObjectName(); if (fullObjectName.hasGenerationId()) { // We can go direct to the deletion request instead of first fetching generation id. Storage.Objects.Delete deleteObject = gcs.objects().delete(bucketName, objectName) .setIfGenerationMatch(fullObjectName.getGenerationId()); batchHelper.queue(deleteObject, getDeletionCallback(fullObjectName, innerExceptions, batchHelper, attempt, fullObjectName.getGenerationId())); } else { // We first need to get the current object version to issue a safe delete for only the // latest version of the object. Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName); batchHelper.queue(getObject, new JsonBatchCallback<StorageObject>() { @Override public void onSuccess(StorageObject storageObject, HttpHeaders httpHeaders) throws IOException { final Long generation = storageObject.getGeneration(); Storage.Objects.Delete deleteObject = gcs.objects().delete(bucketName, objectName) .setIfGenerationMatch(generation); batchHelper.queue(deleteObject, getDeletionCallback(fullObjectName, innerExceptions, batchHelper, attempt, generation)); } @Override public void onFailure(GoogleJsonError googleJsonError, HttpHeaders httpHeaders) throws IOException { if (errorExtractor.itemNotFound(googleJsonError)) { // If the the item isn't found, treat it the same as if it's not found in the delete // case: assume the user wanted the object gone and now it is. LOG.debug("deleteObjects({}) : get not found", fullObjectName.toString()); } else { innerExceptions.add(wrapException(new IOException(googleJsonError.toString()), "Error deleting, stage 1", bucketName, objectName)); } } }); } } /** * Validates basic argument constraints like non-null, non-empty Strings, using {@code * Preconditions} in addition to checking for src/dst bucket existence and compatibility of bucket * properties such as location and storage-class. * @param gcsImpl A GoogleCloudStorage for retrieving bucket info via getItemInfo, but only if * srcBucketName != dstBucketName; passed as a parameter so that this static method can be * used by other implementations of GoogleCloudStorage that want to preserve the validation * behavior of GoogleCloudStorageImpl, including disallowing cross-location copies. */ @VisibleForTesting public static void validateCopyArguments(String srcBucketName, List<String> srcObjectNames, String dstBucketName, List<String> dstObjectNames, GoogleCloudStorage gcsImpl) throws IOException { Preconditions.checkArgument(!Strings.isNullOrEmpty(srcBucketName), "srcBucketName must not be null or empty"); Preconditions.checkArgument(!Strings.isNullOrEmpty(dstBucketName), "dstBucketName must not be null or empty"); Preconditions.checkArgument(srcObjectNames != null, "srcObjectNames must not be null"); Preconditions.checkArgument(dstObjectNames != null, "dstObjectNames must not be null"); Preconditions.checkArgument(srcObjectNames.size() == dstObjectNames.size(), "Must supply same number of elements in srcObjectNames and dstObjectNames"); // Avoid copy across locations or storage classes. if (!srcBucketName.equals(dstBucketName)) { GoogleCloudStorageItemInfo srcBucketInfo = gcsImpl.getItemInfo(new StorageResourceId(srcBucketName)); if (!srcBucketInfo.exists()) { throw new FileNotFoundException("Bucket not found: " + srcBucketName); } GoogleCloudStorageItemInfo dstBucketInfo = gcsImpl.getItemInfo(new StorageResourceId(dstBucketName)); if (!dstBucketInfo.exists()) { throw new FileNotFoundException("Bucket not found: " + dstBucketName); } if (!srcBucketInfo.getLocation().equals(dstBucketInfo.getLocation())) { throw new UnsupportedOperationException( "This operation is not supported across two different storage locations."); } if (!srcBucketInfo.getStorageClass().equals(dstBucketInfo.getStorageClass())) { throw new UnsupportedOperationException( "This operation is not supported across two different storage classes."); } } for (int i = 0; i < srcObjectNames.size(); i++) { Preconditions.checkArgument(!Strings.isNullOrEmpty(srcObjectNames.get(i)), "srcObjectName must not be null or empty"); Preconditions.checkArgument(!Strings.isNullOrEmpty(dstObjectNames.get(i)), "dstObjectName must not be null or empty"); if (srcBucketName.equals(dstBucketName) && srcObjectNames.get(i).equals(dstObjectNames.get(i))) { throw new IllegalArgumentException( String.format("Copy destination must be different from source for %s.", StorageResourceId.createReadableString(srcBucketName, srcObjectNames.get(i)))); } } } /** * See {@link GoogleCloudStorage#copy(String, List, String, List)} for details * about expected behavior. */ @Override public void copy(final String srcBucketName, List<String> srcObjectNames, final String dstBucketName, List<String> dstObjectNames) throws IOException { validateCopyArguments(srcBucketName, srcObjectNames, dstBucketName, dstObjectNames, this); // Gather FileNotFoundExceptions for individual objects, but only throw a single combined // exception at the end. final List<IOException> innerExceptions = new ArrayList<>(); // Perform the copy operations. BatchHelper batchHelper = batchFactory.newBatchHelper(httpRequestInitializer, gcs, storageOptions.getMaxRequestsPerBatch()); for (int i = 0; i < srcObjectNames.size(); i++) { final String srcObjectName = srcObjectNames.get(i); final String dstObjectName = dstObjectNames.get(i); Storage.Objects.Copy copyObject = gcs.objects().copy(srcBucketName, srcObjectName, dstBucketName, dstObjectName, null); batchHelper.queue(copyObject, new JsonBatchCallback<StorageObject>() { @Override public void onSuccess(StorageObject obj, HttpHeaders responseHeaders) { LOG.debug("Successfully copied {} to {}", StorageResourceId.createReadableString(srcBucketName, srcObjectName), StorageResourceId.createReadableString(dstBucketName, dstObjectName)); } @Override public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) { if (errorExtractor.itemNotFound(e)) { LOG.debug("copy({}) : not found", StorageResourceId.createReadableString(srcBucketName, srcObjectName)); innerExceptions.add(GoogleCloudStorageExceptions.getFileNotFoundException(srcBucketName, srcObjectName)); } else { innerExceptions.add(wrapException(new IOException(e.toString()), "Error copying", srcBucketName, srcObjectName)); } } }); } // Execute any remaining requests not divisible by the max batch size. batchHelper.flush(); if (innerExceptions.size() > 0) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } } /** * Shared helper for actually dispatching buckets().list() API calls and accumulating paginated * results; these can then be used to either extract just their names, or to parse into full * GoogleCloudStorageItemInfos. */ private List<Bucket> listBucketsInternal() throws IOException { LOG.debug("listBucketsInternal()"); List<Bucket> allBuckets = new ArrayList<>(); Storage.Buckets.List listBucket = gcs.buckets().list(storageOptions.getProjectId()); // Set number of items to retrieve per call. listBucket.setMaxResults(storageOptions.getMaxListItemsPerCall()); // Loop till we fetch all items. String pageToken = null; do { if (pageToken != null) { LOG.debug("listBucketsInternal: next page {}", pageToken); listBucket.setPageToken(pageToken); } Buckets items = listBucket.execute(); // Accumulate buckets (if any). List<Bucket> buckets = items.getItems(); if (buckets != null) { LOG.debug("listed {} items", buckets.size()); allBuckets.addAll(buckets); } pageToken = items.getNextPageToken(); } while (pageToken != null); return allBuckets; } /** * See {@link GoogleCloudStorage#listBucketNames()} for details about expected behavior. */ @Override public List<String> listBucketNames() throws IOException { LOG.debug("listBucketNames()"); List<String> bucketNames = new ArrayList<>(); List<Bucket> allBuckets = listBucketsInternal(); for (Bucket bucket : allBuckets) { bucketNames.add(bucket.getName()); } return bucketNames; } /** * See {@link GoogleCloudStorage#listBucketInfo()} for details about expected behavior. */ @Override public List<GoogleCloudStorageItemInfo> listBucketInfo() throws IOException { LOG.debug("listBucketInfo()"); List<GoogleCloudStorageItemInfo> bucketInfos = new ArrayList<>(); List<Bucket> allBuckets = listBucketsInternal(); for (Bucket bucket : allBuckets) { bucketInfos.add(new GoogleCloudStorageItemInfo(new StorageResourceId(bucket.getName()), bucket.getTimeCreated().getValue(), 0, bucket.getLocation(), bucket.getStorageClass())); } return bucketInfos; } /** * Helper for creating a Storage.Objects.Copy object ready for dispatch given a bucket and * object for an empty object to be created. Caller must already verify that {@code resourceId} * represents a StorageObject and not a bucket. */ private Storage.Objects.Insert prepareEmptyInsert(StorageResourceId resourceId, CreateObjectOptions createObjectOptions) throws IOException { StorageObject object = new StorageObject(); object.setName(resourceId.getObjectName()); Map<String, String> rewrittenMetadata = encodeMetadata(createObjectOptions.getMetadata()); object.setMetadata(rewrittenMetadata); // Ideally we'd use EmptyContent, but Storage requires an AbstractInputStreamContent and not // just an HttpContent, so we'll just use the next easiest thing. ByteArrayContent emptyContent = new ByteArrayContent(createObjectOptions.getContentType(), new byte[0]); Storage.Objects.Insert insertObject = gcs.objects().insert(resourceId.getBucketName(), object, emptyContent); insertObject.setDisableGZipContent(true); clientRequestHelper.setDirectUploadEnabled(insertObject, true); if (resourceId.hasGenerationId()) { insertObject.setIfGenerationMatch(resourceId.getGenerationId()); } else if (!createObjectOptions.overwriteExisting()) { insertObject.setIfGenerationMatch(0L); } return insertObject; } /** * Helper for both listObjectNames and listObjectInfo that executes the actual API calls to * get paginated lists, accumulating the StorageObjects and String prefixes into the params * {@code listedObjects} and {@code listedPrefixes}. * * @param bucketName bucket name * @param objectNamePrefix object name prefix or null if all objects in the bucket are desired * @param delimiter delimiter to use (typically "/"), otherwise null * @param maxResults maximum number of results to return (total of both * listedObject and listedPrefixes), unlimited if negative or zero * @param listedObjects output parameter into which retrieved StorageObjects will be added * @param listedPrefixes output parameter into which retrieved prefixes will be added */ private void listStorageObjectsAndPrefixes(String bucketName, String objectNamePrefix, String delimiter, long maxResults, List<StorageObject> listedObjects, List<String> listedPrefixes) throws IOException { LOG.debug("listStorageObjectsAndPrefixes({}, {}, {}, {})", bucketName, objectNamePrefix, delimiter, maxResults); Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName), "bucketName must not be null or empty"); Preconditions.checkArgument(listedObjects != null, "Must provide a non-null container for listedObjects."); Preconditions.checkArgument(listedPrefixes != null, "Must provide a non-null container for listedPrefixes."); Preconditions.checkArgument(listedObjects.size() == 0, "Must provide an empty container for listedObjects."); Preconditions.checkArgument(listedPrefixes.size() == 0, "Must provide an empty container for listedPrefixes."); Storage.Objects.List listObject = gcs.objects().list(bucketName); // Set delimiter if supplied. if (delimiter != null) { listObject.setDelimiter(delimiter); } // Set number of items to retrieve per call. if (maxResults <= 0 || maxResults + 1 >= storageOptions.getMaxListItemsPerCall()) { listObject.setMaxResults(storageOptions.getMaxListItemsPerCall()); } else { // We add one in case we filter out objectNamePrefix. listObject.setMaxResults(maxResults + 1); } // Set prefix if supplied. if (!Strings.isNullOrEmpty(objectNamePrefix)) { listObject.setPrefix(objectNamePrefix); } // Loop till we fetch all items. String pageToken = null; Objects items; long numResults = listedObjects.size() + listedPrefixes.size(); long maxRemainingResults = maxResults - numResults; do { if (pageToken != null) { LOG.debug("listStorageObjectsAndPrefixes: next page {}", pageToken); listObject.setPageToken(pageToken); } try { items = listObject.execute(); } catch (IOException e) { if (errorExtractor.itemNotFound(e)) { LOG.debug("listStorageObjectsAndPrefixes({}, {}, {}, {}): item not found", bucketName, objectNamePrefix, delimiter, maxResults); break; } else { throw wrapException(e, "Error listing", bucketName, objectNamePrefix); } } // Add prefixes (if any). List<String> prefixes = items.getPrefixes(); if (prefixes != null) { LOG.debug("listed {} prefixes", prefixes.size()); numResults = listedObjects.size() + listedPrefixes.size(); maxRemainingResults = maxResults - numResults; if (maxResults <= 0 || maxRemainingResults >= prefixes.size()) { listedPrefixes.addAll(prefixes); } else { for (int ii = 0; ii < maxRemainingResults; ii++) { listedPrefixes.add(prefixes.get(ii)); } } } numResults = listedObjects.size() + listedPrefixes.size(); maxRemainingResults = maxResults - numResults; if (maxResults > 0 && maxRemainingResults <= 0) { break; } // Add object names (if any). List<StorageObject> objects = items.getItems(); if (objects != null) { LOG.debug("listed {} objects", objects.size()); // Although GCS does not implement a file system, it treats objects that end // in delimiter as different from other objects when listing objects. // // If caller sends foo/ as the prefix, foo/ is returned as an object name. // That is inconsistent with listing items in a directory. // Not sure if that is a bug in GCS or the intended behavior. // // In this case, we do not want foo/ in the returned list because we want to // keep the behavior more like a file system without calling it as such. // Therefore, we filter out such entry. // Determine if the caller sent a directory name as a prefix. boolean objectPrefixEndsWithDelimiter = !Strings.isNullOrEmpty(objectNamePrefix) && objectNamePrefix.endsWith(PATH_DELIMITER); for (StorageObject object : objects) { String objectName = object.getName(); if (!objectPrefixEndsWithDelimiter || (objectPrefixEndsWithDelimiter && !objectName.equals(objectNamePrefix))) { if (maxResults <= 0 || maxRemainingResults > 0) { listedObjects.add(object); maxRemainingResults--; } else { break; } } } } numResults = listedObjects.size() + listedPrefixes.size(); maxRemainingResults = maxResults - numResults; if (maxResults > 0 && maxRemainingResults <= 0) { break; } pageToken = items.getNextPageToken(); } while (pageToken != null); } /** * See {@link GoogleCloudStorage#listObjectNames(String, String, String)} * for details about expected behavior. */ @Override public List<String> listObjectNames(String bucketName, String objectNamePrefix, String delimiter) throws IOException { return listObjectNames(bucketName, objectNamePrefix, delimiter, GoogleCloudStorage.MAX_RESULTS_UNLIMITED); } /** * See {@link GoogleCloudStorage#listObjectNames(String, String, String, long)} * for details about expected behavior. */ @Override public List<String> listObjectNames(String bucketName, String objectNamePrefix, String delimiter, long maxResults) throws IOException { LOG.debug("listObjectNames({}, {}, {}, {})", bucketName, objectNamePrefix, delimiter, maxResults); // Helper will handle going through pages of list results and accumulating them. List<StorageObject> listedObjects = new ArrayList<>(); List<String> listedPrefixes = new ArrayList<>(); listStorageObjectsAndPrefixes(bucketName, objectNamePrefix, delimiter, maxResults, listedObjects, listedPrefixes); // Just use the prefix list as a starting point, and extract all the names from the // StorageObjects, adding them to the list. // TODO(user): Maybe de-dupe if it's possible for GCS to return duplicates. List<String> objectNames = listedPrefixes; for (StorageObject obj : listedObjects) { objectNames.add(obj.getName()); } return objectNames; } /** * See {@link GoogleCloudStorage#listObjectInfo(String, String, String)} * for details about expected behavior. */ @Override public List<GoogleCloudStorageItemInfo> listObjectInfo(final String bucketName, String objectNamePrefix, String delimiter) throws IOException { return listObjectInfo(bucketName, objectNamePrefix, delimiter, GoogleCloudStorage.MAX_RESULTS_UNLIMITED); } /** * See {@link GoogleCloudStorage#listObjectInfo(String, String, String, long)} * for details about expected behavior. */ @Override public List<GoogleCloudStorageItemInfo> listObjectInfo(final String bucketName, String objectNamePrefix, String delimiter, long maxResults) throws IOException { LOG.debug("listObjectInfo({}, {}, {}, {})", bucketName, objectNamePrefix, delimiter, maxResults); // Helper will handle going through pages of list results and accumulating them. List<StorageObject> listedObjects = new ArrayList<>(); List<String> listedPrefixes = new ArrayList<>(); listStorageObjectsAndPrefixes(bucketName, objectNamePrefix, delimiter, maxResults, listedObjects, listedPrefixes); // For the listedObjects, we simply parse each item into a GoogleCloudStorageItemInfo without // further work. List<GoogleCloudStorageItemInfo> objectInfos = new ArrayList<>(); for (StorageObject obj : listedObjects) { objectInfos.add(createItemInfoForStorageObject(new StorageResourceId(bucketName, obj.getName()), obj)); } if (listedPrefixes.size() > 0) { // Send requests to fetch info about the directories associated with each prefix in batch // requests, maxRequestsPerBatch at a time. List<StorageResourceId> resourceIdsForPrefixes = new ArrayList<>(); for (String prefix : listedPrefixes) { resourceIdsForPrefixes.add(new StorageResourceId(bucketName, prefix)); } List<GoogleCloudStorageItemInfo> prefixInfos = getItemInfos(resourceIdsForPrefixes); List<StorageResourceId> repairList = new ArrayList<>(); for (GoogleCloudStorageItemInfo prefixInfo : prefixInfos) { if (prefixInfo.exists()) { objectInfos.add(prefixInfo); } else { // This indicates a likely "implicit directory" due to a StorageObject lacking a GHFS- // created parent directory. String errorBase = String.format( "Error retrieving object for a retrieved prefix with resourceId '%s'. ", prefixInfo.getResourceId()); if (storageOptions.isAutoRepairImplicitDirectoriesEnabled()) { LOG.debug(errorBase + "Attempting to repair missing directory."); repairList.add(prefixInfo.getResourceId()); } else if (storageOptions.isInferImplicitDirectoriesEnabled()) { objectInfos.add(createItemInfoForInferredDirectory(prefixInfo.getResourceId())); } else { LOG.error(errorBase + "Giving up on retrieving missing directory."); } } } // Handle repairs. if (storageOptions.isAutoRepairImplicitDirectoriesEnabled() && !repairList.isEmpty()) { try { LOG.warn("Repairing batch of {} missing directories.", repairList.size()); if (repairList.size() == 1) { createEmptyObject(repairList.get(0)); } else { createEmptyObjects(repairList); } // Fetch and append all the repaired metadatas. List<GoogleCloudStorageItemInfo> repairedInfos = getItemInfos(repairList); int numRepaired = 0; for (GoogleCloudStorageItemInfo repairedInfo : repairedInfos) { if (repairedInfo.exists()) { objectInfos.add(repairedInfo); ++numRepaired; } else { LOG.warn("Somehow the repair for '{}' failed quietly", repairedInfo.getResourceId()); if (storageOptions.isInferImplicitDirectoriesEnabled()) { objectInfos.add(createItemInfoForInferredDirectory(repairedInfo.getResourceId())); } } } LOG.warn("Successfully repaired {}/{} implicit directories.", numRepaired, repairList.size()); } catch (IOException ioe) { // Don't totally fail the listObjectInfo call, since auto-repair is best-effort // anyways. LOG.error("Failed to repair some missing directories.", ioe); if (storageOptions.isInferImplicitDirectoriesEnabled()) { // If we have both auto-repair and auto-infer set, and we fail // to repair everything, then infer what was not repaired. List<GoogleCloudStorageItemInfo> repairedInfos = getItemInfos(repairList); int numRepaired = 0; for (GoogleCloudStorageItemInfo repairedInfo : repairedInfos) { if (repairedInfo.exists()) { objectInfos.add(repairedInfo); ++numRepaired; } else { LOG.info("Repair for '{}' failed, using inferred directory", repairedInfo.getResourceId()); objectInfos.add(createItemInfoForInferredDirectory(repairedInfo.getResourceId())); } } if (numRepaired > 0) { LOG.info("Successfully repaired {}/{} implicit directories.", numRepaired, repairList.size()); } } } } } return objectInfos; } /** * Helper for converting a StorageResourceId + Bucket into a GoogleCloudStorageItemInfo. */ public static GoogleCloudStorageItemInfo createItemInfoForBucket(StorageResourceId resourceId, Bucket bucket) { Preconditions.checkArgument(resourceId != null, "resourceId must not be null"); Preconditions.checkArgument(bucket != null, "bucket must not be null"); Preconditions.checkArgument(resourceId.isBucket(), String.format("resourceId must be a Bucket. resourceId: %s", resourceId)); Preconditions.checkArgument(resourceId.getBucketName().equals(bucket.getName()), String.format("resourceId.getBucketName() must equal bucket.getName(): '%s' vs '%s'", resourceId.getBucketName(), bucket.getName())); // For buckets, size is 0. return new GoogleCloudStorageItemInfo(resourceId, bucket.getTimeCreated().getValue(), 0, bucket.getLocation(), bucket.getStorageClass()); } /** * Helper for converting a StorageResourceId + StorageObject into a GoogleCloudStorageItemInfo. */ public static GoogleCloudStorageItemInfo createItemInfoForStorageObject(StorageResourceId resourceId, StorageObject object) { Preconditions.checkArgument(resourceId != null, "resourceId must not be null"); Preconditions.checkArgument(object != null, "object must not be null"); Preconditions.checkArgument(resourceId.isStorageObject(), String.format("resourceId must be a StorageObject. resourceId: %s", resourceId)); Preconditions.checkArgument(resourceId.getBucketName().equals(object.getBucket()), String.format("resourceId.getBucketName() must equal object.getBucket(): '%s' vs '%s'", resourceId.getBucketName(), object.getBucket())); Preconditions.checkArgument(resourceId.getObjectName().equals(object.getName()), String.format("resourceId.getObjectName() must equal object.getName(): '%s' vs '%s'", resourceId.getObjectName(), object.getName())); Map<String, byte[]> decodedMetadata = object.getMetadata() == null ? null : decodeMetadata(object.getMetadata()); byte[] md5Hash = null; byte[] crc32c = null; if (!Strings.isNullOrEmpty(object.getCrc32c())) { crc32c = BaseEncoding.base64().decode(object.getCrc32c()); } if (!Strings.isNullOrEmpty(object.getMd5Hash())) { md5Hash = BaseEncoding.base64().decode(object.getMd5Hash()); } // GCS API does not make available location and storage class at object level at present // (it is same for all objects in a bucket). Further, we do not use the values for objects. // The GoogleCloudStorageItemInfo thus has 'null' for location and storage class. return new GoogleCloudStorageItemInfo(resourceId, object.getUpdated().getValue(), object.getSize().longValue(), null, null, object.getContentType(), decodedMetadata, object.getGeneration(), object.getMetageneration(), new VerificationAttributes(md5Hash, crc32c)); } /** * Helper for converting from a Map<String, byte[]> metadata map that may be in a * StorageObject into a Map<String, String> suitable for placement inside a * GoogleCloudStorageItemInfo. */ @VisibleForTesting static Map<String, String> encodeMetadata(Map<String, byte[]> metadata) { return Maps.transformValues(metadata, ENCODE_METADATA_VALUES); } /** * Inverse function of {@link #encodeMetadata(Map)}. */ @VisibleForTesting static Map<String, byte[]> decodeMetadata(Map<String, String> metadata) { return Maps.transformValues(metadata, DECODE_METADATA_VALUES); } /** * Helper for creating a "found" GoogleCloudStorageItemInfo * for an inferred directory. */ @VisibleForTesting public static GoogleCloudStorageItemInfo createItemInfoForInferredDirectory(StorageResourceId resourceId) { Preconditions.checkArgument(resourceId != null, "resourceId must not be null"); // Return size == 0, creationTime == 0, // location == storageClass == null for an inferred directory object. return new GoogleCloudStorageItemInfo(resourceId, 0, 0, null, null); } /** * Helper for creating a "not found" GoogleCloudStorageItemInfo for a StorageResourceId. */ public static GoogleCloudStorageItemInfo createItemInfoForNotFound(StorageResourceId resourceId) { Preconditions.checkArgument(resourceId != null, "resourceId must not be null"); // Return size == -1, creationTime == 0, location == storageClass == null for a not-found // Bucket or StorageObject. return new GoogleCloudStorageItemInfo(resourceId, 0, -1, null, null); } /** * See {@link GoogleCloudStorage#getItemInfos(List<StorageResourceId>)} for details about expected * behavior. */ @Override public List<GoogleCloudStorageItemInfo> getItemInfos(List<StorageResourceId> resourceIds) throws IOException { LOG.debug("getItemInfos({})", resourceIds.toString()); final Map<StorageResourceId, GoogleCloudStorageItemInfo> itemInfos = new HashMap<>(); final List<IOException> innerExceptions = new ArrayList<>(); BatchHelper batchHelper = batchFactory.newBatchHelper(httpRequestInitializer, gcs, storageOptions.getMaxRequestsPerBatch()); // For each resourceId, we'll either directly add ROOT_INFO, enqueue a Bucket fetch request, or // enqueue a StorageObject fetch request. for (final StorageResourceId resourceId : resourceIds) { if (resourceId.isRoot()) { itemInfos.put(resourceId, GoogleCloudStorageItemInfo.ROOT_INFO); } else if (resourceId.isBucket()) { batchHelper.queue(gcs.buckets().get(resourceId.getBucketName()), new JsonBatchCallback<Bucket>() { @Override public void onSuccess(Bucket bucket, HttpHeaders responseHeaders) { LOG.debug("getItemInfos: Successfully fetched bucket: {} for resourceId: {}", bucket, resourceId); itemInfos.put(resourceId, createItemInfoForBucket(resourceId, bucket)); } @Override public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) { if (errorExtractor.itemNotFound(e)) { LOG.debug("getItemInfos: bucket not found: {}", resourceId.getBucketName()); itemInfos.put(resourceId, createItemInfoForNotFound(resourceId)); } else { innerExceptions.add(wrapException(new IOException(e.toString()), "Error getting Bucket: ", resourceId.getBucketName(), null)); } } }); } else { final String bucketName = resourceId.getBucketName(); final String objectName = resourceId.getObjectName(); batchHelper.queue(gcs.objects().get(bucketName, objectName), new JsonBatchCallback<StorageObject>() { @Override public void onSuccess(StorageObject obj, HttpHeaders responseHeaders) { LOG.debug("getItemInfos: Successfully fetched object '{}' for resourceId '{}'", obj, resourceId); itemInfos.put(resourceId, createItemInfoForStorageObject(resourceId, obj)); } @Override public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) { if (errorExtractor.itemNotFound(e)) { LOG.debug("getItemInfos: object not found: {}", resourceId); itemInfos.put(resourceId, createItemInfoForNotFound(resourceId)); } else { innerExceptions.add(wrapException(new IOException(e.toString()), "Error getting StorageObject: ", bucketName, objectName)); } } }); } } batchHelper.flush(); if (innerExceptions.size() > 0) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } // Assemble the return list in the same order as the input arguments. List<GoogleCloudStorageItemInfo> sortedItemInfos = new ArrayList<>(); for (StorageResourceId resourceId : resourceIds) { Preconditions.checkState(itemInfos.containsKey(resourceId), String.format("Somehow missing resourceId '%s' from map: %s", resourceId, itemInfos)); sortedItemInfos.add(itemInfos.get(resourceId)); } // We expect the return list to be the same size, even if some entries were "not found". Preconditions.checkState(sortedItemInfos.size() == resourceIds.size(), String.format("sortedItemInfos.size() (%d) != resourceIds.size() (%d). infos: %s, ids: %s", sortedItemInfos.size(), resourceIds.size(), sortedItemInfos, resourceIds)); return sortedItemInfos; } @Override public List<GoogleCloudStorageItemInfo> updateItems(List<UpdatableItemInfo> itemInfoList) throws IOException { LOG.debug("updateItems({})", itemInfoList.toString()); final Map<StorageResourceId, GoogleCloudStorageItemInfo> resultItemInfos = new HashMap<>(); final List<IOException> innerExceptions = new ArrayList<>(); BatchHelper batchHelper = batchFactory.newBatchHelper(httpRequestInitializer, gcs, storageOptions.getMaxRequestsPerBatch()); for (UpdatableItemInfo itemInfo : itemInfoList) { Preconditions.checkArgument( !itemInfo.getStorageResourceId().isBucket() && !itemInfo.getStorageResourceId().isRoot(), "Buckets and GCS Root resources are not supported for updateItems"); } for (final UpdatableItemInfo itemInfo : itemInfoList) { final StorageResourceId resourceId = itemInfo.getStorageResourceId(); final String bucketName = resourceId.getBucketName(); final String objectName = resourceId.getObjectName(); Map<String, byte[]> originalMetadata = itemInfo.getMetadata(); Map<String, String> rewrittenMetadata = encodeMetadata(originalMetadata); Storage.Objects.Patch patch = gcs.objects().patch(bucketName, objectName, new StorageObject().setMetadata(rewrittenMetadata)); batchHelper.queue(patch, new JsonBatchCallback<StorageObject>() { @Override public void onSuccess(StorageObject obj, HttpHeaders responseHeaders) { LOG.debug("updateItems: Successfully updated object '{}' for resourceId '{}'", obj, resourceId); resultItemInfos.put(resourceId, createItemInfoForStorageObject(resourceId, obj)); } @Override public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) { if (errorExtractor.itemNotFound(e)) { LOG.debug("updateItems: object not found: {}", resourceId); resultItemInfos.put(resourceId, createItemInfoForNotFound(resourceId)); } else { innerExceptions.add(wrapException(new IOException(e.toString()), "Error getting StorageObject: ", bucketName, objectName)); } } }); } batchHelper.flush(); if (innerExceptions.size() > 0) { throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions); } // Assemble the return list in the same order as the input arguments. List<GoogleCloudStorageItemInfo> sortedItemInfos = new ArrayList<>(); for (UpdatableItemInfo itemInfo : itemInfoList) { Preconditions.checkState(resultItemInfos.containsKey(itemInfo.getStorageResourceId()), String.format( "Missing resourceId '%s' from map: %s", itemInfo.getStorageResourceId(), resultItemInfos)); sortedItemInfos.add(resultItemInfos.get(itemInfo.getStorageResourceId())); } // We expect the return list to be the same size, even if some entries were "not found". Preconditions.checkState(sortedItemInfos.size() == itemInfoList.size(), String.format( "sortedItemInfos.size() (%d) != resourceIds.size() (%d). infos: %s, updateItemInfos: %s", sortedItemInfos.size(), itemInfoList.size(), sortedItemInfos, itemInfoList)); return sortedItemInfos; } /** * See {@link GoogleCloudStorage#getItemInfo(StorageResourceId)} for details about expected * behavior. */ @Override public GoogleCloudStorageItemInfo getItemInfo(StorageResourceId resourceId) throws IOException { LOG.debug("getItemInfo({})", resourceId); // Handle ROOT case first. if (resourceId.isRoot()) { return GoogleCloudStorageItemInfo.ROOT_INFO; } GoogleCloudStorageItemInfo itemInfo = null; // Determine object size. // // For buckets, size is 0. // For objects not found, size is -1. // For objects that exist, size is in number of bytes. if (resourceId.isBucket()) { Bucket bucket = getBucket(resourceId.getBucketName()); if (bucket != null) { itemInfo = createItemInfoForBucket(resourceId, bucket); } } else { StorageObject object = getObject(resourceId); if (object != null) { itemInfo = createItemInfoForStorageObject(resourceId, object); } } if (itemInfo == null) { itemInfo = createItemInfoForNotFound(resourceId); } LOG.debug("getItemInfo: {}", itemInfo); return itemInfo; } /** * See {@link GoogleCloudStorage#close()} for details about expected behavior. */ @Override public void close() { // Calling shutdown() is a no-op if it was already called earlier, // therefore no need to guard against that by setting threadPool to null. LOG.debug("close()"); threadPool.shutdown(); manualBatchingThreadPool.shutdown(); } /** * Gets the bucket with the given name. * * @param bucketName name of the bucket to get * @return the bucket with the given name or null if bucket not found * @throws IOException if the bucket exists but cannot be accessed */ private Bucket getBucket(String bucketName) throws IOException { LOG.debug("getBucket({})", bucketName); Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName), "bucketName must not be null or empty"); Bucket bucket = null; Storage.Buckets.Get getBucket = gcs.buckets().get(bucketName); try { bucket = getBucket.execute(); } catch (IOException e) { if (errorExtractor.itemNotFound(e)) { LOG.debug("getBucket({}) : not found", bucketName); } else { LOG.debug(String.format("getBucket(%s) threw exception: ", bucketName), e); throw wrapException(e, "Error accessing", bucketName, null); } } return bucket; } /** * Gets the object generation for a Write operation * * @param resourceId object for which generation info is requested * @return the generation of the object * @throws IOException if the object already exists and cannot be overwritten */ private long getWriteGeneration(StorageResourceId resourceId, boolean overwritable) throws IOException { LOG.debug("getWriteGeneration({}, {})", resourceId, overwritable); GoogleCloudStorageItemInfo info = getItemInfo(resourceId); if (!info.exists()) { return 0L; } else if (info.exists() && overwritable) { long generation = info.getContentGeneration(); Preconditions.checkState(generation != 0, "Generation should not be 0 for an existing item"); return generation; } else { throw new FileAlreadyExistsException(String.format("Object %s already exists.", resourceId.toString())); } } /** * Wraps the given IOException into another IOException, * adding the given error message and a reference to the supplied * bucket and object. It allows one to know which bucket and object * were being accessed when the exception occurred for an operation. */ @VisibleForTesting IOException wrapException(IOException e, String message, String bucketName, String objectName) { String name = "bucket: " + bucketName; if (!Strings.isNullOrEmpty(objectName)) { name += ", object: " + objectName; } String fullMessage = String.format("%s: %s", message, name); return new IOException(fullMessage, e); } /** * Gets the object with the given resourceId. * * @param resourceId identifies a StorageObject * @return the object with the given name or null if object not found * @throws IOException if the object exists but cannot be accessed */ private StorageObject getObject(StorageResourceId resourceId) throws IOException { LOG.debug("getObject({})", resourceId); Preconditions.checkArgument(resourceId.isStorageObject(), "Expected full StorageObject id, got " + resourceId); String bucketName = resourceId.getBucketName(); String objectName = resourceId.getObjectName(); StorageObject object = null; Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName); try { object = getObject.execute(); } catch (IOException e) { if (errorExtractor.itemNotFound(e)) { LOG.debug("getObject({}) : not found", resourceId); } else { LOG.debug(String.format("getObject(%s) threw exception: ", resourceId), e); throw wrapException(e, "Error accessing", bucketName, objectName); } } return object; } /** * Helper to check whether an empty object already exists with the expected metadata specified * in {@code options}, to be used to determine whether it's safe to ignore an exception that * was thrown when trying to create the object, {@code exceptionOnCreate}. */ private boolean canIgnoreExceptionForEmptyObject(IOException exceptionOnCreate, StorageResourceId resourceId, CreateObjectOptions options) throws IOException { // TODO(user): Maybe also add 5xx, 409, and even 412 errors if they pop up in this use case. if (errorExtractor.rateLimited(exceptionOnCreate)) { // Try to re-fetch to see if our work might already be done. GoogleCloudStorageItemInfo existingInfo = getItemInfo(resourceId); // Compare existence, size, and metadata; for 429 errors creating an empty object, // we don't care about metaGeneration/contentGeneration as long as the metadata // matches, since we don't know for sure whether our low-level request succeeded // first or some other client succeeded first. if (existingInfo.exists() && existingInfo.getSize() == 0) { if (!options.getRequireMetadataMatchForEmptyObjects()) { return true; } else if (existingInfo.metadataEquals(options.getMetadata())) { return true; } } } return false; } /** * See {@link GoogleCloudStorage#waitForBucketEmpty(String)} for details about expected behavior. */ @Override public void waitForBucketEmpty(String bucketName) throws IOException { Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName), "bucketName must not be null or empty"); int maxRetries = BUCKET_EMPTY_MAX_RETRIES; int waitTime = BUCKET_EMPTY_WAIT_TIME_MS; // milliseconds for (int i = 0; i < maxRetries; i++) { // We only need one item to see the bucket is not yet empty. List<String> objectNames = listObjectNames(bucketName, null, PATH_DELIMITER, 1); if (objectNames.size() == 0) { return; } try { sleeper.sleep(waitTime); } catch (InterruptedException ignored) { // Ignore the exception and loop. } } throw new IOException("Internal error: bucket not empty: " + bucketName); } @Override public void compose(final String bucketName, List<String> sources, String destination, String contentType) throws IOException { LOG.debug("compose({}, {}, {}, {})", bucketName, sources, destination, contentType); List<StorageResourceId> sourceIds = Lists.transform(sources, new Function<String, StorageResourceId>() { @Override public StorageResourceId apply(String objectName) { return new StorageResourceId(bucketName, objectName); } }); StorageResourceId destinationId = new StorageResourceId(bucketName, destination); CreateObjectOptions options = new CreateObjectOptions(true, contentType, CreateObjectOptions.EMPTY_METADATA); composeObjects(sourceIds, destinationId, options); } @Override public GoogleCloudStorageItemInfo composeObjects(List<StorageResourceId> sources, final StorageResourceId destination, CreateObjectOptions options) throws IOException { LOG.debug("composeObjects({}, {}, {})", sources, destination, options); for (StorageResourceId inputId : sources) { if (!destination.getBucketName().equals(inputId.getBucketName())) { throw new IOException(String.format("Bucket doesn't match for source '%s' and destination '%s'!", inputId, destination)); } } List<SourceObjects> sourceObjects = Lists.transform(sources, new Function<StorageResourceId, SourceObjects>() { @Override public SourceObjects apply(StorageResourceId input) { // TODO(user): Maybe set generationIds for source objects as well here. return new SourceObjects().setName(input.getObjectName()); } }); Compose compose = gcs.objects().compose(destination.getBucketName(), destination.getObjectName(), new ComposeRequest().setSourceObjects(sourceObjects) .setDestination(new StorageObject().setContentType(options.getContentType()) .setMetadata(encodeMetadata(options.getMetadata())))); if (destination.hasGenerationId()) { compose.setIfGenerationMatch(destination.getGenerationId()); } else { compose.setIfGenerationMatch(getWriteGeneration(destination, true)); } LOG.debug("composeObjects.execute()"); GoogleCloudStorageItemInfo compositeInfo = createItemInfoForStorageObject(destination, compose.execute()); LOG.debug("composeObjects() done, returning: {}", compositeInfo); return compositeInfo; } }