Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * (C) Copyright IBM Corp. 2015, 2016 */ package com.ibm.stocator.fs.cos; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; import java.io.OutputStream; import java.net.URI; import java.util.Map; import java.util.List; import java.util.Properties; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.ArrayList; import java.util.HashMap; import java.util.Date; import com.ibm.stocator.fs.cache.MemoryCache; import com.ibm.stocator.fs.common.Constants; import com.ibm.stocator.fs.common.IStoreClient; import com.ibm.stocator.fs.common.StocatorPath; import com.ibm.stocator.fs.common.Utils; import com.ibm.stocator.fs.common.exception.ConfigurationParseException; import com.ibm.stocator.fs.cos.ConfigurationHandler; import com.ibm.stocator.fs.cos.OnetimeInitialization; import com.ibm.stocator.fs.cos.COSInputStream; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.S3ClientOptions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.UploadPartRequest; import com.amazonaws.services.s3.model.UploadPartResult; import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; import com.amazonaws.services.s3.transfer.Upload; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ListeningExecutorService; import com.amazonaws.ClientConfiguration; import com.amazonaws.Protocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import static com.ibm.stocator.fs.common.Constants.HADOOP_SUCCESS; import static com.ibm.stocator.fs.common.Constants.HADOOP_TEMPORARY; import static com.ibm.stocator.fs.common.Constants.HADOOP_ATTEMPT; import static com.ibm.stocator.fs.common.Constants.CACHE_SIZE; import static com.ibm.stocator.fs.common.Constants.GUAVA_CACHE_SIZE_DEFAULT; import static com.ibm.stocator.fs.cos.COSConstants.CLIENT_EXEC_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_CLIENT_EXEC_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_ESTABLISH_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_KEEPALIVE_TIME; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MAXIMUM_CONNECTIONS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MAX_ERROR_RETRIES; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MAX_THREADS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MAX_TOTAL_TASKS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MIN_MULTIPART_THRESHOLD; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MULTIPART_SIZE; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_REQUEST_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_SECURE_CONNECTIONS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_SOCKET_RECV_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_SOCKET_SEND_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_SOCKET_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.FS_COS; import static com.ibm.stocator.fs.cos.COSConstants.FS_S3_A; import static com.ibm.stocator.fs.cos.COSConstants.FS_S3_D; import static com.ibm.stocator.fs.cos.COSConstants.ESTABLISH_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.KEEPALIVE_TIME; import static com.ibm.stocator.fs.cos.COSConstants.MAXIMUM_CONNECTIONS; import static com.ibm.stocator.fs.cos.COSConstants.MAX_ERROR_RETRIES; import static com.ibm.stocator.fs.cos.COSConstants.MAX_THREADS; import static com.ibm.stocator.fs.cos.COSConstants.MAX_TOTAL_TASKS; import static com.ibm.stocator.fs.cos.COSConstants.MIN_MULTIPART_THRESHOLD; import static com.ibm.stocator.fs.cos.COSConstants.MULTIPART_SIZE; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_DOMAIN; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_HOST; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_PASSWORD; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_PORT; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_USERNAME; import static com.ibm.stocator.fs.cos.COSConstants.PROXY_WORKSTATION; import static com.ibm.stocator.fs.cos.COSConstants.REQUEST_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.AUTO_BUCKET_CREATE_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.ACCESS_KEY_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.SECRET_KEY_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.BLOCK_SIZE_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.COS_BUCKET_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.ENDPOINT_URL_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.FMODE_AUTOMATIC_DELETE_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.REGION_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.V2_SIGNER_TYPE_COS_PROPERTY; import static com.ibm.stocator.fs.cos.COSConstants.SECURE_CONNECTIONS; import static com.ibm.stocator.fs.cos.COSConstants.SIGNING_ALGORITHM; import static com.ibm.stocator.fs.cos.COSConstants.SOCKET_RECV_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.SOCKET_SEND_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.SOCKET_TIMEOUT; import static com.ibm.stocator.fs.cos.COSConstants.USER_AGENT_PREFIX; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_USER_AGENT_PREFIX; import static com.ibm.stocator.fs.cos.COSConstants.ENABLE_MULTI_DELETE; import static com.ibm.stocator.fs.cos.COSConstants.PURGE_EXISTING_MULTIPART; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_PURGE_EXISTING_MULTIPART; import static com.ibm.stocator.fs.cos.COSConstants.PURGE_EXISTING_MULTIPART_AGE; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_PURGE_EXISTING_MULTIPART_AGE; import static com.ibm.stocator.fs.cos.COSConstants.FAST_UPLOAD; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_FAST_UPLOAD; import static com.ibm.stocator.fs.cos.COSConstants.FAST_UPLOAD_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_FAST_UPLOAD_BUFFER; import static com.ibm.stocator.fs.cos.COSConstants.FAST_UPLOAD_ACTIVE_BLOCKS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS; import static com.ibm.stocator.fs.cos.COSConstants.MAX_PAGING_KEYS; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_MAX_PAGING_KEYS; import static com.ibm.stocator.fs.cos.COSConstants.FLAT_LISTING; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_FLAT_LISTING; import static com.ibm.stocator.fs.cos.COSConstants.READAHEAD_RANGE; import static com.ibm.stocator.fs.cos.COSConstants.DEFAULT_READAHEAD_RANGE; import static com.ibm.stocator.fs.cos.COSConstants.INPUT_FADVISE; import static com.ibm.stocator.fs.cos.COSConstants.INPUT_FADV_NORMAL; import static com.ibm.stocator.fs.cos.COSUtils.translateException; public class COSAPIClient implements IStoreClient { /* * Logger */ private static final Logger LOG = LoggerFactory.getLogger(COSAPIClient.class); private static final String[] FS_ALT_KEYS = new String[] { FS_S3_A, FS_S3_D }; /* * root bucket */ private String mBucket; /* * COS client */ private AmazonS3 mClient; /* * block size */ private long mBlockSize; /* * If true, automatic delete will be activated on the data generated from * failed tasks */ private boolean fModeAutomaticDelete; /* * If true, V2 signer will be created for authentication otherwise V4 */ private boolean mIsV2Signer; /* * support for different schema models */ private String schemaProvided; /* * Contains map of objects that were created by successful jobs. Used in * container listing */ private Map<String, Boolean> mCachedSparkJobsStatus; /* * Contains map of object names that were created. Used in container * listing */ private Map<String, Boolean> mCachedSparkOriginated; private URI filesystemURI; private Configuration conf; private TransferManager transfers; private long partSize; private long multiPartThreshold; private ListeningExecutorService threadPoolExecutor; private ExecutorService unboundedThreadPool; private COSLocalDirAllocator directoryAllocator; private Path workingDir; private OnetimeInitialization singletoneInitTimeData; private boolean enableMultiObjectsDelete; private boolean blockUploadEnabled; private String blockOutputBuffer; private COSDataBlocks.BlockFactory blockFactory; private int blockOutputActiveBlocks; private MemoryCache memoryCache; private int maxKeys; private boolean flatListingFlag; private long readAhead; private COSInputPolicy inputPolicy; private int cacheSize; private final String amazonDefaultEndpoint = "s3.amazonaws.com"; private StocatorPath stocatorPath; public COSAPIClient(URI pFilesystemURI, Configuration pConf) throws IOException { filesystemURI = pFilesystemURI; conf = pConf; LOG.info("Init : {}", filesystemURI.toString()); singletoneInitTimeData = OnetimeInitialization.getInstance(); } @Override public void initiate(String scheme) throws IOException, ConfigurationParseException { mCachedSparkOriginated = new HashMap<String, Boolean>(); mCachedSparkJobsStatus = new HashMap<String, Boolean>(); schemaProvided = scheme; Properties props = ConfigurationHandler.initialize(filesystemURI, conf, scheme); // Set bucket name property int cacheSize = conf.getInt(CACHE_SIZE, GUAVA_CACHE_SIZE_DEFAULT); memoryCache = MemoryCache.getInstance(cacheSize); mBucket = props.getProperty(COS_BUCKET_PROPERTY); workingDir = new Path("/user", System.getProperty("user.name")).makeQualified(filesystemURI, getWorkingDirectory()); fModeAutomaticDelete = "true".equals(props.getProperty(FMODE_AUTOMATIC_DELETE_COS_PROPERTY, "false")); mIsV2Signer = "true".equals(props.getProperty(V2_SIGNER_TYPE_COS_PROPERTY, "false")); // Define COS client String accessKey = props.getProperty(ACCESS_KEY_COS_PROPERTY); String secretKey = props.getProperty(SECRET_KEY_COS_PROPERTY); if (accessKey == null) { throw new ConfigurationParseException("Access KEY is empty. Please provide valid access key"); } if (secretKey == null) { throw new ConfigurationParseException("Secret KEY is empty. Please provide valid secret key"); } BasicAWSCredentials creds = new BasicAWSCredentials(accessKey, secretKey); ClientConfiguration clientConf = new ClientConfiguration(); int maxThreads = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, MAX_THREADS, DEFAULT_MAX_THREADS); if (maxThreads < 2) { LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2."); maxThreads = 2; } int totalTasks = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS); long keepAliveTime = Utils.getLong(conf, FS_COS, FS_ALT_KEYS, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME); threadPoolExecutor = BlockingThreadPoolExecutorService.newInstance(maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared"); unboundedThreadPool = new ThreadPoolExecutor(maxThreads, Integer.MAX_VALUE, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), BlockingThreadPoolExecutorService.newDaemonThreadFactory("s3a-transfer-unbounded")); boolean secureConnections = Utils.getBoolean(conf, FS_COS, FS_ALT_KEYS, SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); clientConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); String proxyHost = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, PROXY_HOST, ""); int proxyPort = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, PROXY_PORT, -1); if (!proxyHost.isEmpty()) { clientConf.setProxyHost(proxyHost); if (proxyPort >= 0) { clientConf.setProxyPort(proxyPort); } else { if (secureConnections) { LOG.warn("Proxy host set without port. Using HTTPS default 443"); clientConf.setProxyPort(443); } else { LOG.warn("Proxy host set without port. Using HTTP default 80"); clientConf.setProxyPort(80); } } String proxyUsername = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, PROXY_USERNAME); String proxyPassword = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, PROXY_PASSWORD); if ((proxyUsername == null) != (proxyPassword == null)) { String msg = "Proxy error: " + PROXY_USERNAME + " or " + PROXY_PASSWORD + " set without the other."; LOG.error(msg); throw new IllegalArgumentException(msg); } clientConf.setProxyUsername(proxyUsername); clientConf.setProxyPassword(proxyPassword); clientConf.setProxyDomain(Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, PROXY_DOMAIN)); clientConf.setProxyWorkstation(Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, PROXY_WORKSTATION)); if (LOG.isDebugEnabled()) { LOG.debug( "Using proxy server {}:{} as user {} with password {} on " + "domain {} as workstation {}", clientConf.getProxyHost(), clientConf.getProxyPort(), String.valueOf(clientConf.getProxyUsername()), clientConf.getProxyPassword(), clientConf.getProxyDomain(), clientConf.getProxyWorkstation()); } } else if (proxyPort >= 0) { String msg = "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; LOG.error(msg); throw new IllegalArgumentException(msg); } initConnectionSettings(conf, clientConf); if (mIsV2Signer) { clientConf.withSignerOverride("S3SignerType"); } mClient = new AmazonS3Client(creds, clientConf); final String serviceUrl = props.getProperty(ENDPOINT_URL_COS_PROPERTY); if (serviceUrl != null && !serviceUrl.equals(amazonDefaultEndpoint)) { mClient.setEndpoint(serviceUrl); } mClient.setS3ClientOptions(S3ClientOptions.builder().setPathStyleAccess(true).build()); // Set block size property String mBlockSizeString = props.getProperty(BLOCK_SIZE_COS_PROPERTY, "128"); mBlockSize = Long.valueOf(mBlockSizeString).longValue() * 1024 * 1024L; boolean autoCreateBucket = "true" .equalsIgnoreCase((props.getProperty(AUTO_BUCKET_CREATE_COS_PROPERTY, "false"))); partSize = Utils.getLong(conf, FS_COS, FS_ALT_KEYS, MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE); multiPartThreshold = Utils.getLong(conf, FS_COS, FS_ALT_KEYS, MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); readAhead = Utils.getLong(conf, FS_COS, FS_ALT_KEYS, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE); LOG.debug(READAHEAD_RANGE + ":" + readAhead); inputPolicy = COSInputPolicy .getPolicy(Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, INPUT_FADVISE, INPUT_FADV_NORMAL)); initTransferManager(); maxKeys = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS); flatListingFlag = Utils.getBoolean(conf, FS_COS, FS_ALT_KEYS, FLAT_LISTING, DEFAULT_FLAT_LISTING); if (autoCreateBucket) { try { boolean bucketExist = mClient.doesBucketExist(mBucket); if (bucketExist) { LOG.trace("Bucket {} exists", mBucket); } else { LOG.trace("Bucket {} doesn`t exists and autocreate", mBucket); String mRegion = props.getProperty(REGION_COS_PROPERTY); if (mRegion == null) { mClient.createBucket(mBucket); } else { LOG.trace("Creating bucket {} in region {}", mBucket, mRegion); mClient.createBucket(mBucket, mRegion); } } } catch (AmazonServiceException ase) { /* * we ignore the BucketAlreadyExists exception since multiple processes or threads * might try to create the bucket in parrallel, therefore it is expected that * some will fail to create the bucket */ if (!ase.getErrorCode().equals("BucketAlreadyExists")) { LOG.error(ase.getMessage()); throw (ase); } } catch (Exception e) { LOG.error(e.getMessage()); throw (e); } } initMultipartUploads(conf); enableMultiObjectsDelete = Utils.getBoolean(conf, FS_COS, FS_ALT_KEYS, ENABLE_MULTI_DELETE, true); blockUploadEnabled = Utils.getBoolean(conf, FS_COS, FS_ALT_KEYS, FAST_UPLOAD, DEFAULT_FAST_UPLOAD); if (blockUploadEnabled) { blockOutputBuffer = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER); partSize = COSUtils.ensureOutputParameterInRange(MULTIPART_SIZE, partSize); blockFactory = COSDataBlocks.createFactory(this, blockOutputBuffer); blockOutputActiveBlocks = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS); LOG.debug("Using COSBlockOutputStream with buffer = {}; block={};" + " queue limit={}", blockOutputBuffer, partSize, blockOutputActiveBlocks); } else { LOG.debug("Using COSOutputStream"); } } @Override public long getBlockSize() { return mBlockSize; } @Override public String getDataRoot() { return mBucket; } /** * Request object metadata, Used to call _SUCCESS object or identify if objects * were generated by Stocator * * @param key key * @return the metadata */ protected ObjectMetadata getObjectMetadata(String key) { try { ObjectMetadata meta = mClient.getObjectMetadata(mBucket, key); return meta; } catch (AmazonClientException e) { LOG.warn(e.getMessage()); return null; } } @Override public FileStatus getFileStatus(String hostName, Path path, String msg) throws IOException, FileNotFoundException { FileStatus res = null; FileStatus cached = memoryCache.getFileStatus(path.toString()); if (cached != null) { return cached; } LOG.trace("getFileStatus(start) for {}, hostname: {}", path, hostName); /* * The requested path is equal to hostName. HostName is equal to * hostNameScheme, thus the container. Therefore we have no object to look * for and we return the FileStatus as a directory. Containers have to * lastModified. */ if (path.toString().equals(hostName) || (path.toString().length() + 1 == hostName.length())) { LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0L, true, 1, mBlockSize, 0L, path); memoryCache.putFileStatus(path.toString(), res); return res; } if (path.toString().contains(HADOOP_TEMPORARY)) { LOG.debug("getFileStatus on temp object {}. Return not found", path.toString()); throw new FileNotFoundException("Not found " + path.toString()); } String key = pathToKey(hostName, path); LOG.debug("getFileStatus: on original key {}", key); try { FileStatus fileStatus = null; try { fileStatus = getFileStatusKeyBased(key, path); } catch (AmazonS3Exception e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } if (fileStatus != null) { LOG.trace("getFileStatus(completed) {}", path); memoryCache.putFileStatus(path.toString(), fileStatus); return fileStatus; } // means key returned not found. Trying to call get file status on key/ // probably not needed this call if (!key.endsWith("/")) { String newKey = key + "/"; try { LOG.debug("getFileStatus: original key not found. Alternative key {}", key); fileStatus = getFileStatusKeyBased(newKey, path); } catch (AmazonS3Exception e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } if (fileStatus != null) { LOG.trace("getFileStatus(completed) {}", path); memoryCache.putFileStatus(path.toString(), fileStatus); return fileStatus; } else { // if here: both key and key/ returned not found. // trying to see if pseudo directory of the form // a/b/key/d/e (a/b/key/ doesn't exists by itself) // perform listing on the key LOG.debug("getFileStatus: Modifined key {} not found. Trying to lisr", key); key = maybeAddTrailingSlash(key); ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(mBucket); request.setPrefix(key); request.setDelimiter("/"); request.setMaxKeys(1); ObjectListing objects = mClient.listObjects(request); if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjectSummaries().isEmpty()) { LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0, true, 1, 0, 0, path); memoryCache.putFileStatus(path.toString(), res); return res; } else if (key.isEmpty()) { LOG.debug("Found root directory"); LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0, true, 1, 0, 0, path); memoryCache.putFileStatus(path.toString(), res); return res; } } } } catch (AmazonS3Exception e) { if (e.getStatusCode() == 403) { throw new IOException(e); } } catch (Exception e) { LOG.debug("Not found {}", path.toString()); LOG.warn(e.getMessage()); throw new FileNotFoundException("Not found " + path.toString()); } throw new FileNotFoundException("Not found " + path.toString()); } private FileStatus getFileStatusKeyBased(String key, Path path) throws AmazonS3Exception { LOG.trace("internal method - get file status by key {}, path {}", key, path); FileStatus cachedFS = memoryCache.getFileStatus(path.toString()); if (cachedFS != null) { return cachedFS; } ObjectMetadata meta = mClient.getObjectMetadata(mBucket, key); String sparkOrigin = meta.getUserMetaDataOf("data-origin"); boolean stocatorCreated = false; if (sparkOrigin != null) { String tmp = (String) sparkOrigin; if (tmp.equals("stocator")) { stocatorCreated = true; } } mCachedSparkOriginated.put(key, Boolean.valueOf(stocatorCreated)); FileStatus fs = createFileStatus(meta.getContentLength(), key, meta.getLastModified(), path); memoryCache.putFileStatus(path.toString(), fs); return fs; } private FileStatus createFileStatus(S3ObjectSummary objSummary, String hostName, Path path) throws IllegalArgumentException, IOException { String objKey = objSummary.getKey(); String newMergedPath = getMergedPath(hostName, path, objKey); return createFileStatus(objSummary.getSize(), objKey, objSummary.getLastModified(), new Path(newMergedPath)); } private FileStatus createFileStatus(long contentlength, String key, Date lastModified, Path path) { if (objectRepresentsDirectory(key, contentlength)) { LOG.debug("Found exact file: fake directory {}", path.toString()); return new FileStatus(0, true, 1, 0, 0, path); } else { LOG.debug("Found exact file: normal file {}", path.toString()); long fileModificationTime = 0L; if (lastModified != null) { fileModificationTime = lastModified.getTime(); } return new FileStatus(contentlength, false, 1, mBlockSize, fileModificationTime, path); } } /** * Turns a path (relative or otherwise) into an COS key, adding a trailing "/" * if the path is not the root <i>and</i> does not already have a "/" at the * end. * * @param key COS key or "" * @return the with a trailing "/", or, if it is the root key, "", */ private String maybeAddTrailingSlash(String key) { if (!key.isEmpty() && !key.endsWith("/")) { return key + '/'; } else { return key; } } /** * Predicate: does the object represent a directory? * * @param name object name * @param size object size * @return true if it meets the criteria for being an object */ public static boolean objectRepresentsDirectory(final String name, final long size) { return !name.isEmpty() && size == 0L; } @Override public boolean exists(String hostName, Path path) throws IOException, FileNotFoundException { LOG.trace("Object exists: {}", path); String objName = path.toString(); if (path.toString().startsWith(hostName)) { objName = path.toString().substring(hostName.length()); } if (objName.contains(HADOOP_TEMPORARY)) { LOG.debug("Exists on temp object {}. Return false", objName); return false; } try { if (getFileStatus(hostName, path, "exists") != null) { return true; } } catch (FileNotFoundException e) { return false; } return false; } @Override public FSDataInputStream getObject(String hostName, Path path) throws IOException { LOG.debug("Opening '{}' for reading.", path); String key = pathToKey(hostName, path); FileStatus fileStatus = memoryCache.getFileStatus(path.toString()); if (fileStatus == null) { fileStatus = getFileStatus(hostName, path, "getObject"); } if (fileStatus.isDirectory()) { throw new FileNotFoundException("Can't open " + path + " because it is a directory"); } COSInputStream inputStream = new COSInputStream(mBucket, key, fileStatus.getLen(), mClient, readAhead, inputPolicy); return new FSDataInputStream(inputStream); } @Override public FSDataOutputStream createObject(String objName, String contentType, Map<String, String> metadata, Statistics statistics) throws IOException { try { String objNameWithoutBuket = objName; if (objName.startsWith(mBucket + "/")) { objNameWithoutBuket = objName.substring(mBucket.length() + 1); } if (blockUploadEnabled) { return new FSDataOutputStream(new COSBlockOutputStream(this, objNameWithoutBuket, new SemaphoredDelegatingExecutor(threadPoolExecutor, blockOutputActiveBlocks, true), partSize, blockFactory, contentType, new WriteOperationHelper(objNameWithoutBuket), metadata), null); } if (!contentType.equals(Constants.APPLICATION_DIRECTORY)) { return new FSDataOutputStream( new COSOutputStream(mBucket, objName, mClient, contentType, metadata, transfers, this), statistics); } else { final InputStream im = new InputStream() { @Override public int read() throws IOException { return -1; } }; final ObjectMetadata om = new ObjectMetadata(); om.setContentLength(0L); om.setContentType(contentType); om.setUserMetadata(metadata); // Remove the bucket name prefix from key path if (objName.startsWith(mBucket + "/")) { objName = objName.substring(mBucket.length() + 1); } /* if (!objName.endsWith("/")) { objName = objName + "/"; }*/ LOG.debug("bucket: {}, key {}", mBucket, objName); PutObjectRequest putObjectRequest = new PutObjectRequest(mBucket, objName, im, om); Upload upload = transfers.upload(putObjectRequest); upload.waitForUploadResult(); OutputStream fakeStream = new OutputStream() { @Override public void write(int b) throws IOException { } @Override public void close() throws IOException { super.close(); } }; return new FSDataOutputStream(fakeStream, statistics); } } catch (InterruptedException e) { throw new InterruptedIOException("Interrupted creating " + objName); } catch (IOException e) { LOG.error(e.getMessage()); throw e; } } /** * Create a putObject request. * Adds the ACL and metadata * @param key key of object * @param metadata metadata header * @param srcfile source file * @return the request */ public PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, File srcfile) { PutObjectRequest putObjectRequest = new PutObjectRequest(mBucket, key, srcfile); putObjectRequest.setMetadata(metadata); return putObjectRequest; } /** * Create a {@link PutObjectRequest} request. * The metadata is assumed to have been configured with the size of the * operation. * @param key key of object * @param metadata metadata header * @param inputStream source data * @return the request */ private PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, InputStream inputStream) { PutObjectRequest putObjectRequest = new PutObjectRequest(mBucket, key, inputStream, metadata); return putObjectRequest; } public void setStocatorPath(StocatorPath sp) { stocatorPath = sp; } @Override public String getScheme() { return schemaProvided; } @Override public boolean delete(String hostName, Path path, boolean recursive) throws IOException { String obj = path.toString(); if (path.toString().startsWith(hostName)) { obj = path.toString().substring(hostName.length()); } LOG.debug("Object name to delete {}. Path {}", obj, path.toString()); try { mClient.deleteObject(new DeleteObjectRequest(mBucket, obj)); memoryCache.removeFileStatus(path.toString()); return true; } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } LOG.warn("Delete on {} not found. Nothing to delete"); return false; } public URI getAccessURI() throws IOException { return filesystemURI; } /** * Set the current working directory for the given file system. All relative * paths will be resolved relative to it. * * @param newDir new directory */ public void setWorkingDirectory(Path newDir) { workingDir = newDir; } public Path getWorkingDirectory() { return workingDir; } /** * {@inheritDoc} * * Prefix based * Return everything that starts with the prefix * Fill listing * Return all objects, even zero size * If fileStatus is null means the path is part of some name, neither object * or pseudo directory. Was called by Globber * * @param hostName hostName * @param path path * @param fullListing Return all objects, even zero size * @param prefixBased Return everything that starts with the prefix * @return list * @throws IOException if error */ /* public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased) throws IOException { String key = pathToKey(hostName, path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucket).withPrefix(key); String curObj; if (path.toString().equals(mBucket)) { curObj = ""; } else if (path.toString().startsWith(mBucket + "/")) { curObj = path.toString().substring(mBucket.length() + 1); } else if (path.toString().startsWith(hostName)) { curObj = path.toString().substring(hostName.length()); } else { curObj = path.toString(); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); if (objectSummaries.size() == 0) { FileStatus[] emptyRes = {}; LOG.debug("List for bucket {} is empty", mBucket); return emptyRes; } boolean objectScanContinue = true; S3ObjectSummary prevObj = null; while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); if (!prefixBased && !curObj.equals("") && !path.toString().endsWith("/") && !unifiedObjectName.equals(curObj) && !unifiedObjectName.startsWith(curObj + "/")) { LOG.trace("{} does not match {}. Skipped", unifiedObjectName, curObj); continue; } if (isSparkOrigin(unifiedObjectName) && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); if (!isJobSuccessful(unifiedObjectName)) { LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName); if (fModeAutomaticDelete) { delete(hostName, new Path(objKey), true); } continue; } else { // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } } if (prevObj.getSize() > 0 || fullListing) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null && (prevObj.getSize() > 0 || fullListing)) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } if (LOG.isTraceEnabled()) { LOG.trace("COS List to return length {}", tmpResult.size()); for (FileStatus fs: tmpResult) { LOG.trace("{}", fs.getPath()); } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); } */ @Override public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException { LOG.debug("Native direct list status for {}", path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); String key = pathToKey(hostName, path); if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/")) { key = key + "/"; LOG.debug("listNativeDirect modify key to {}", key); } Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>(); ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(mBucket); request.setMaxKeys(5000); request.setPrefix(key); if (!flatListing) { request.setDelimiter("/"); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); List<String> commonPrefixes = objectList.getCommonPrefixes(); boolean objectScanContinue = true; S3ObjectSummary prevObj = null; // start FTA logic boolean stocatorOrigin = isSparkOrigin(key, path.toString()); if (stocatorOrigin) { LOG.debug("Stocator origin is true for {}", key); if (!isJobSuccessful(key)) { LOG.debug("{} created by failed Spark job. Skipped", key); if (fModeAutomaticDelete) { delete(hostName, new Path(key), true); } return new FileStatus[0]; } } while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); LOG.debug("list candidate {}, unified name {}", objKey, unifiedObjectName); if (stocatorOrigin && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } FileStatus fs = createFileStatus(prevObj, hostName, path); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else { emptyObjects.put(fs.getPath().toString(), fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null) { FileStatus fs = createFileStatus(prevObj, hostName, path); LOG.debug("Adding the last object from the list {}", fs.getPath()); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) { emptyObjects.put(fs.getPath().toString(), fs); } } // get common prefixes for (String comPrefix : commonPrefixes) { LOG.debug("Common prefix is {}", comPrefix); if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) { FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix)); LOG.debug("Match between common prefix and empty object {}. Adding to result", comPrefix); if (filter == null) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else if (filter != null && filter.accept(status.getPath())) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else { LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter); } } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); } /** * Merge between two paths * * @param hostName * @param p path * @param objectKey * @return merged path */ private String getMergedPath(String hostName, Path p, String objectKey) { if ((p.getParent() != null) && (p.getName() != null) && (p.getParent().toString().equals(hostName))) { if (objectKey.equals(p.getName())) { return p.toString(); } return hostName + objectKey; } return hostName + objectKey; } /** * Turns a path (relative or otherwise) into an COS key * * @host hostName host of the object * @param path object full path */ private String pathToKey(String hostName, Path path) { if (!path.isAbsolute()) { String pathStr = path.toUri().getPath(); if (pathStr.startsWith(mBucket) && !pathStr.equals(mBucket)) { path = new Path(pathStr.substring(mBucket.length() + 1)); } path = new Path(hostName, path); } if (path.toUri().getScheme() != null && path.toUri().getPath().isEmpty()) { return ""; } return path.toUri().getPath().substring(1); } public Path keyToQualifiedPath(String hostName, String key) { return new Path(hostName, key); } /** * Checks if container/object contains container/object/_SUCCESS If so, this * object was created by successful Hadoop job * * @param objectKey * @return boolean if job is successful */ private boolean isJobSuccessful(String objectKey) { LOG.trace("isJobSuccessful: for {}", objectKey); if (mCachedSparkJobsStatus.containsKey(objectKey)) { LOG.debug("isJobSuccessful: {} found cached", objectKey); return mCachedSparkJobsStatus.get(objectKey).booleanValue(); } String key = getRealKey(objectKey); Path p = new Path(key, HADOOP_SUCCESS); ObjectMetadata statusMetadata = getObjectMetadata(p.toString()); Boolean isJobOK = Boolean.FALSE; if (statusMetadata != null) { isJobOK = Boolean.TRUE; } LOG.debug("isJobSuccessful: not cached {}. Status is {}", objectKey, isJobOK); mCachedSparkJobsStatus.put(objectKey, isJobOK); return isJobOK.booleanValue(); } /** * Accepts any object name. If object name of the form * a/b/c/gil.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c * .csv-attempt_20160317132wrong_0000_m_000000_1 Then a/b/c/gil.data is * returned. Code testing that attempt_20160317132wrong_0000_m_000000_1 is * valid task id identifier * * @param objectKey * @return unified object name */ private String extractUnifiedObjectName(String objectKey) { return extractFromObjectKeyWithTaskID(objectKey, true); } /** * Accepts any object name. If object name is of the form * a/b/c/m.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c * .csv-attempt_20160317132wrong_0000_m_000000_1 Then * a/b/c/m.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c.csv is * returned. Perform test that attempt_20160317132wrong_0000_m_000000_1 is * valid task id identifier * * @param objectName * @return unified object name */ private String nameWithoutTaskID(String objectKey) { return extractFromObjectKeyWithTaskID(objectKey, false); } /** * Extracts from the object key an unified object name or name without task ID * * @param objectKey * @param isUnifiedObjectKey * @return */ private String extractFromObjectKeyWithTaskID(String objectKey, boolean isUnifiedObjectKey) { Path p = new Path(objectKey); int index = objectKey.indexOf("-" + HADOOP_ATTEMPT); if (index > 0) { String attempt = objectKey.substring(objectKey.lastIndexOf("-") + 1); try { TaskAttemptID.forName(attempt); if (isUnifiedObjectKey) { return p.getParent().toString(); } else { return objectKey.substring(0, index); } } catch (IllegalArgumentException e) { return objectKey; } } else if (isUnifiedObjectKey && objectKey.indexOf(HADOOP_SUCCESS) > 0) { return p.getParent().toString(); } return objectKey; } /** * Checks if container/object exists and verifies that it contains * Data-Origin=stocator metadata If so, object was created by Spark. * * @param objectKey the key of the object * @param path the object path * @return boolean if object was created by Spark */ private boolean isSparkOrigin(String objectKey, String path) { LOG.debug("check spark origin for {}", objectKey); if (!objectKey.endsWith("/")) { LOG.debug("Key {} has no slash. Return false", objectKey); return false; } else { objectKey = objectKey.substring(0, objectKey.length() - 1); } if (mCachedSparkOriginated.containsKey(objectKey)) { boolean res = mCachedSparkOriginated.get(objectKey).booleanValue(); LOG.debug("found cached for spark origin for {}. Status {}", objectKey, res); return res; } String key = getRealKey(objectKey); Boolean sparkOriginated = Boolean.FALSE; ObjectMetadata objMetadata = getObjectMetadata(key); if (objMetadata != null) { Object sparkOrigin = objMetadata.getUserMetaDataOf("data-origin"); if (sparkOrigin != null) { String tmp = (String) sparkOrigin; if (tmp.equals("stocator")) { sparkOriginated = Boolean.TRUE; } } } mCachedSparkOriginated.put(key, sparkOriginated); LOG.debug("spark origin for {} is {} non cached", objectKey, sparkOriginated.booleanValue()); return sparkOriginated.booleanValue(); } private String getRealKey(String objectKey) { String key = objectKey; if (objectKey.toString().startsWith(mBucket + "/") && !objectKey.toString().equals(mBucket + "/")) { key = objectKey.substring(mBucket.length() + 1); } return key; } private void initMultipartUploads(Configuration conf) throws IOException { boolean purgeExistingMultipart = Utils.getBoolean(conf, FS_COS, FS_ALT_KEYS, PURGE_EXISTING_MULTIPART, DEFAULT_PURGE_EXISTING_MULTIPART); long purgeExistingMultipartAge = Utils.getLong(conf, FS_COS, FS_ALT_KEYS, PURGE_EXISTING_MULTIPART_AGE, DEFAULT_PURGE_EXISTING_MULTIPART_AGE); if (purgeExistingMultipart) { Date purgeBefore = new Date(new Date().getTime() - purgeExistingMultipartAge * 1000); try { transfers.abortMultipartUploads(mBucket, purgeBefore); } catch (AmazonServiceException e) { if (e.getStatusCode() == 403) { LOG.debug("Failed to purging multipart uploads against {}," + " FS may be read only", mBucket, e); } else { throw translateException("purging multipart uploads", mBucket, e); } } } } /** * Initializes connection management * * @param conf Hadoop configuration * @param clientConf client SDK configuration */ private void initConnectionSettings(Configuration conf, ClientConfiguration clientConf) throws IOException { clientConf.setMaxConnections( Utils.getInt(conf, FS_COS, FS_ALT_KEYS, MAXIMUM_CONNECTIONS, DEFAULT_MAXIMUM_CONNECTIONS)); clientConf.setClientExecutionTimeout( Utils.getInt(conf, FS_COS, FS_ALT_KEYS, CLIENT_EXEC_TIMEOUT, DEFAULT_CLIENT_EXEC_TIMEOUT)); clientConf.setMaxErrorRetry( Utils.getInt(conf, FS_COS, FS_ALT_KEYS, MAX_ERROR_RETRIES, DEFAULT_MAX_ERROR_RETRIES)); clientConf.setConnectionTimeout( Utils.getInt(conf, FS_COS, FS_ALT_KEYS, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT)); clientConf .setSocketTimeout(Utils.getInt(conf, FS_COS, FS_ALT_KEYS, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT)); clientConf.setRequestTimeout( Utils.getInt(conf, FS_COS, FS_ALT_KEYS, REQUEST_TIMEOUT, DEFAULT_REQUEST_TIMEOUT)); int sockSendBuffer = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, SOCKET_SEND_BUFFER, DEFAULT_SOCKET_SEND_BUFFER); int sockRecvBuffer = Utils.getInt(conf, FS_COS, FS_ALT_KEYS, SOCKET_RECV_BUFFER, DEFAULT_SOCKET_RECV_BUFFER); clientConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); String signerOverride = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, SIGNING_ALGORITHM, ""); if (!signerOverride.isEmpty()) { LOG.debug("Signer override = {}", signerOverride); clientConf.setSignerOverride(signerOverride); } String userAgentPrefix = Utils.getTrimmed(conf, FS_COS, FS_ALT_KEYS, USER_AGENT_PREFIX, DEFAULT_USER_AGENT_PREFIX); String userAgentName = singletoneInitTimeData.getUserAgentName(); if (!userAgentPrefix.equals(DEFAULT_USER_AGENT_PREFIX)) { userAgentName = userAgentPrefix + " " + userAgentName; } clientConf.setUserAgentPrefix(userAgentName); } @Override public boolean rename(String hostName, String srcPath, String dstPath) throws IOException { // Not yet implemented return false; } private void initTransferManager() { TransferManagerConfiguration transferConfiguration = new TransferManagerConfiguration(); transferConfiguration.setMinimumUploadPartSize(partSize); transferConfiguration.setMultipartUploadThreshold(multiPartThreshold); transferConfiguration.setMultipartCopyPartSize(partSize); transferConfiguration.setMultipartCopyThreshold(multiPartThreshold); transfers = new TransferManager(mClient, unboundedThreadPool); transfers.setConfiguration(transferConfiguration); } private synchronized File createTmpDirForWrite(String pathStr, String tmpDirName) throws IOException { LOG.trace("tmpDirName is {}", tmpDirName); if (directoryAllocator == null) { String bufferDir = "hadoop.tmp.dir"; LOG.trace("Local buffer directorykey is {}", bufferDir); directoryAllocator = new COSLocalDirAllocator(conf, bufferDir); } return directoryAllocator.createTmpFileForWrite(pathStr, COSLocalDirAllocator.SIZE_UNKNOWN, conf); } File createTmpFileForWrite(String pathStr) throws IOException { String tmpDirName = conf.get("hadoop.tmp.dir") + "/stocator"; File tmpDir = createTmpDirForWrite(pathStr, tmpDirName); return tmpDir; } /** * Upload part of a multi-partition file. * <i>Important: this call does not close any input stream in the request.</i> * @param request request * @return the result of the operation * @throws AmazonClientException on problems */ public UploadPartResult uploadPart(UploadPartRequest request) throws AmazonClientException { try { UploadPartResult uploadPartResult = mClient.uploadPart(request); return uploadPartResult; } catch (AmazonClientException e) { throw e; } } final class WriteOperationHelper { private final String key; private WriteOperationHelper(String keyT) { key = keyT; } /** * Create a {@link PutObjectRequest} request. * If {@code length} is set, the metadata is configured with the size of * the upload. * @param inputStream source data * @param length size, if known. Use -1 for not known * @return the request */ PutObjectRequest newPutRequest(InputStream inputStream, long length) { PutObjectRequest request = newPutObjectRequest(key, newObjectMetadata(length), inputStream); return request; } /** * Create a {@link PutObjectRequest} request to upload a file. * @param sourceFile source file * @return the request */ PutObjectRequest newPutRequest(File sourceFile) { int length = (int) sourceFile.length(); PutObjectRequest request = newPutObjectRequest(key, newObjectMetadata(length), sourceFile); return request; } /** * Callback on a successful write. */ void writeSuccessful() { LOG.debug("successful write"); } /** * A helper method to delete a list of keys on a s3-backend. * * @param keysToDelete collection of keys to delete on the s3-backend * if empty, no request is made of the object store. * @param clearKeys clears the keysToDelete-list after processing the list * when set to true * @param deleteFakeDir indicates whether this is for deleting fake dirs * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory */ private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete, boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException, InvalidRequestException { if (keysToDelete.isEmpty()) { // exit fast if there are no keys to delete return; } for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { blockRootDelete(keyVersion.getKey()); } if (enableMultiObjectsDelete) { mClient.deleteObjects(new DeleteObjectsRequest(mBucket).withKeys(keysToDelete)); } else { for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { String key = keyVersion.getKey(); blockRootDelete(key); mClient.deleteObject(mBucket, key); } } if (clearKeys) { keysToDelete.clear(); } } /** * Reject any request to delete an object where the key is root. * @param key key to validate * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory */ private void blockRootDelete(String key) throws InvalidRequestException { if (key.isEmpty() || "/".equals(key)) { throw new InvalidRequestException("Bucket " + mBucket + " cannot be deleted"); } } /** * Callback on a write failure. * @param e Any exception raised which triggered the failure */ void writeFailed(Exception e) { LOG.debug("Write to {} failed", this, e); } /** * Create a new object metadata instance. * Any standard metadata headers are added here, for example: * encryption * @param length size, if known. Use -1 for not known * @return a new metadata instance */ public ObjectMetadata newObjectMetadata(long length) { final ObjectMetadata om = new ObjectMetadata(); if (length >= 0) { om.setContentLength(length); } return om; } /** * Start the multipart upload process. * @return the upload result containing the ID * @throws IOException IO problem */ String initiateMultiPartUpload() throws IOException { LOG.debug("Initiating Multipart upload"); final InitiateMultipartUploadRequest initiateMPURequest = new InitiateMultipartUploadRequest(mBucket, key, newObjectMetadata(-1)); try { return mClient.initiateMultipartUpload(initiateMPURequest).getUploadId(); } catch (AmazonClientException ace) { throw translateException("initiate MultiPartUpload", key, ace); } } /** * Complete a multipart upload operation. * @param uploadId multipart operation Id * @param partETags list of partial uploads * @return the result * @throws AmazonClientException on problems */ CompleteMultipartUploadResult completeMultipartUpload(String uploadId, List<PartETag> partETags) throws AmazonClientException { LOG.debug("Completing multipart upload {} with {} parts", uploadId, partETags.size()); return mClient .completeMultipartUpload(new CompleteMultipartUploadRequest(mBucket, key, uploadId, partETags)); } /** * Abort a multipart upload operation * @param uploadId multipart operation Id * @throws AmazonClientException on problems */ void abortMultipartUpload(String uploadId) throws AmazonClientException { LOG.debug("Aborting multipart upload {}", uploadId); mClient.abortMultipartUpload(new AbortMultipartUploadRequest(mBucket, key, uploadId)); } /** * Create and initialize a part request of a multipart upload. * Exactly one of: {@code uploadStream} or {@code sourceFile} * must be specified. * @param uploadId ID of ongoing upload * @param partNumber current part number of the upload * @param size amount of data * @param uploadStream source of data to upload * @param sourceFile optional source file * @return the request */ UploadPartRequest newUploadPartRequest(String uploadId, int partNumber, int size, InputStream uploadStream, File sourceFile) { Preconditions.checkNotNull(uploadId); // exactly one source must be set; xor verifies this Preconditions.checkArgument((uploadStream != null) ^ (sourceFile != null), "Data source"); Preconditions.checkArgument(size > 0, "Invalid partition size %s", size); Preconditions.checkArgument(partNumber > 0 && partNumber <= 10000, "partNumber must be between 1 and 10000 inclusive, but is %s", partNumber); LOG.debug("Creating part upload request for {} #{} size {}", uploadId, partNumber, size); UploadPartRequest request = new UploadPartRequest().withBucketName(mBucket).withKey(key) .withUploadId(uploadId).withPartNumber(partNumber).withPartSize(size); if (uploadStream != null) { // there's an upload stream. Bind to it. request.setInputStream(uploadStream); } else { request.setFile(sourceFile); } return request; } /** * The toString method is intended to be used in logging/toString calls. * @return a string description */ @Override public String toString() { final StringBuilder sb = new StringBuilder("{bucket=").append(mBucket); sb.append(", key='").append(key).append('\''); sb.append('}'); return sb.toString(); } /** * PUT an object directly (i.e. not via the transfer manager). * @param putObjectRequest the request * @return the upload initiated * @throws IOException on problems */ PutObjectResult putObject(PutObjectRequest putObjectRequest) throws IOException { try { PutObjectResult result = mClient.putObject(putObjectRequest); return result; } catch (AmazonClientException e) { throw translateException("put", putObjectRequest.getKey(), e); } } } private String getFirstName(String p) { if (p.startsWith("/")) { p = p.substring(p.indexOf("/")); } if (p.indexOf("/") > 0) { return p.substring(0, p.indexOf("/")); } return p; } /** * Initiate a {@code listObjects} operation, incrementing metrics * in the process. * @param request request to initiate * @return the results */ protected ObjectListing listObjects(ListObjectsRequest request) { return mClient.listObjects(request); } /** * List the next set of objects. * @param objects paged result * @return the next result object */ protected ObjectListing continueListObjects(ObjectListing objects) { return mClient.listNextBatchOfObjects(objects); } /** * Get the maximum key count. * @return a value, valid after initialization */ int getMaxKeys() { return maxKeys; } /** * Build a {@link LocatedFileStatus} from a {@link FileStatus} instance. * @param status file status * @return a located status with block locations set up from this FS * @throws IOException IO Problems */ LocatedFileStatus toLocatedFileStatus(FileStatus status) throws IOException { return new LocatedFileStatus(status, null); } @Override public boolean isFlatListing() { return flatListingFlag; } }