List of usage examples for com.amazonaws.services.s3 AmazonS3 getObject
ObjectMetadata getObject(GetObjectRequest getObjectRequest, File destinationFile)
throws SdkClientException, AmazonServiceException;
Gets the object metadata for the object stored in Amazon S3 under the specified bucket and key, and saves the object contents to the specified file.
From source file:org.deeplearning4j.aws.s3.reader.S3Downloader.java
License:Apache License
public void download(String bucket, String key, OutputStream to) throws IOException { AmazonS3 s3 = getClient(); S3Object obj = s3.getObject(bucket, key); InputStream is = obj.getObjectContent(); BufferedOutputStream bos = new BufferedOutputStream(to); IOUtils.copy(is, bos);/* ww w.j ava 2 s . c om*/ bos.close(); is.close(); obj.close(); }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;/* w ww . ja va2 s.co m*/ String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Executes a Hive job into the AWS Elastic MapReduce service. *//*from w w w . j a v a 2 s .c om*/ public Result execute(Result result, int arg1) throws KettleException { // Setup a log file. Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // Create and connect an AWS service. AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); AmazonS3 s3Client = new AmazonS3Client(awsCredentials); // Get bucket name and S3 URL. String stagingBucketName = GetBucketName(stagingDir); String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$ // Prepare staging S3 URL for Hive script file. String stagingS3qUrl = ""; if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$ // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path} if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$ stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$ } else { stagingS3qUrl = qUrl; } } else { // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory. // First, check for the correct protocol. if (!qUrl.startsWith("file:")) { //$NON-NLS-1$ if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$ } } // pull down .q file from VSF FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl)); File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$ tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut); // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/ String key = GetKeyFromS3Url(stagingDir); if (key == null) { key = qFile.getName().getBaseName(); } else { key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$ } // delete the previous .q file in S3 try { s3Client.deleteObject(stagingBucketName, key); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // Put .q file in S3 Log Directory. s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile)); stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$ } // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR // job. jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$ RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create an EMR job flow, start a step to setup Hive and get the job flow ID. runJobFlowRequest = createJobFlow(); runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); hadoopJobFlowId = runJobFlowResult.getJobFlowId(); } // Now EMR job flow is ready to accept a Run Hive Script step. // First, prepare a Job Flow ID list. List<String> jobFlowIds = new ArrayList<String>(); jobFlowIds.add(hadoopJobFlowId); // Configure a HadoopJarStep. String args = "s3://elasticmapreduce/libs/hive/hive-script " + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f " + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args); // Add a Run Hive Script step to the existing job flow. AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); // Set a logging interval. String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 10; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$ loggingIntervalS)); } // monitor and log if intended. if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; //$NON-NLS-1$ while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$ "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId) + executionState); if (parentJob.isStopped()) { if (!alive) { TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest(); terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId); emrClient.terminateJobFlows(terminateJobFlowsRequest); } break; } try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { logError(Const.getStackTracker(ie)); } } if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$ result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$ ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.systemsbiology.athero.SimpleStoreActivitiesS3Impl.java
License:Open Source License
/** * /* w ww . jav a 2s. c o m*/ * @param bucketName * Name of S3 bucket * @param remoteName * Key to use for uploaded S3 object * @param localName * Name of the file locally * @param toBox * This is an output parameter here. * Used to communicate the name of the box that runs download activity * @return * A Value object * @throws IOException */ private String downloadFileFromS3(String bucketName, String remoteName, String localName) throws IOException { System.out.println("downloadFileFromS3 begin remoteName=" + remoteName + ", localName=" + localName); AmazonS3 storage = getS3Client(); try { FileOutputStream f = new FileOutputStream(localName); try { S3Object obj = storage.getObject(bucketName, remoteName); InputStream inputStream = obj.getObjectContent(); long totalSize = obj.getObjectMetadata().getContentLength(); try { long totalRead = 0; int read = 0; byte[] bytes = new byte[1024]; long lastHeartbeatTime = System.currentTimeMillis(); while ((read = inputStream.read(bytes)) != -1) { totalRead += read; f.write(bytes, 0, read); int progress = (int) (totalRead / totalSize * 100); lastHeartbeatTime = heartbeat(lastHeartbeatTime, progress); } } finally { inputStream.close(); } } finally { f.close(); } } catch (AmazonServiceException e) { String message = "Failure downloading from S3"; System.out.println(message); throw e; } catch (AmazonClientException e) { String message = "Failure downloading from S3"; System.out.println(message); throw e; } catch (IOException e) { String message = "Failure downloading from S3"; System.out.println(message); throw e; } // Return hostname file was downloaded to System.out.println("downloadFileFromS3 done"); return hostSpecificTaskList; //todo: remove after testing }
From source file:pagerank.S3Wrapper.java
License:Open Source License
public static void main(String[] args) throws IOException { /*//from ww w . j a v a 2s. c o m * The ProfileCredentialsProvider will return your [default] * credential profile by reading from the credentials file located at * (/home/yupenglu/.aws/credentials). */ AWSCredentials credentials = null; try { credentials = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException("Cannot load the credentials from the credential profiles file. " + "Please make sure that your credentials file is at the correct " + "location (/home/yupenglu/.aws/credentials), and is in valid format.", e); } AmazonS3 s3 = new AmazonS3Client(credentials); // Region usWest2 = Region.getRegion(Regions.US_WEST_2); // s3.setRegion(usWest2); // String bucketName = "my-first-s3-bucket-" + UUID.randomUUID(); String bucketName = "pages4.27"; String key = "NewKey"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ // System.out.println("Creating bucket " + bucketName + "\n"); // s3.createBucket(bucketName); /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ // System.out.println("Uploading a new object to S3 from a file\n"); // s3.putObject(new PutObjectRequest(bucketName, key, createSampleFile())); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ // System.out.println("Downloading an object"); // S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); // System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); // displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); // ObjectListing objectListing = s3.listObjects(new ListObjectsRequest() // .withBucketName(bucketName) // .withPrefix("My")); ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(bucketName)); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println(" - " + URLDecoder.decode(objectSummary.getKey(), "UTF-8") + " " + "(size = " + objectSummary.getSize() + ")"); } S3Object testObj = s3.getObject(bucketName, URLEncoder.encode("http://finance.yahoo.com/investing-news/", "UTF-8")); S3ObjectInputStream inputStream = testObj.getObjectContent(); // System.out.println(streamToString(inputStream)); System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ // System.out.println("Deleting an object\n"); // s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ // System.out.println("Deleting bucket " + bucketName + "\n"); // s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:pl.worker.Main.java
public static BufferedImage getFile(String fileName) throws IOException { AmazonS3 s3Client = new AmazonS3Client(); S3Object s3Object = s3Client.getObject("lab4-weeia", "agnieszka.leszczynska/" + fileName); return ImageIO.read(s3Object.getObjectContent()); }
From source file:s3copy.SimpleCopier.java
License:Apache License
void run(String[] args) throws IOException { SCCmdLine cmdline = new SCCmdLine(); SCCmdLine.Settings settings = cmdline.parse(args); String[] components = settings.input.split("/"); String bucket = components[0]; StringBuilder sb = new StringBuilder(); for (int i = 1; i < components.length; i++) { sb.append(components[i]);/*from ww w . j a v a 2 s . c om*/ sb.append("/"); } sb.deleteCharAt(sb.length() - 1); AmazonS3 s3client = new AmazonS3Client( new BasicAWSCredentials(settings.access_key_id, settings.secret_access_key)); System.out.println("bucket: " + bucket); System.out.println("value: " + sb.toString()); S3Object obj = s3client.getObject(bucket, sb.toString()); InputStream is = obj.getObjectContent(); OutputStream out = null; if (!settings.isURI()) { //local copy out = new FileOutputStream(settings.output); } else { Configuration conf = new Configuration(); if (settings.conf != null) { File _conf = new File(settings.conf); if (_conf.exists()) { if (_conf.isDirectory()) { conf.addResource(new Path(new File(settings.conf, "core-site.xml").getAbsolutePath())); } else { conf.addResource(new Path(settings.conf)); } } } FileSystem fs = FileSystem.get(conf); out = fs.create(new Path(settings.output)); } IOUtils.copyLarge(is, out); out.close(); }
From source file:thinkbig.util.Util.java
License:Open Source License
/** * Check if all the S3 Uris exist/*from w w w. java 2s . c o m*/ * @param s3 * @param s3Uris * @return true, if all Uris in the list exist */ public static boolean checkIfUrisExist(AmazonS3 s3, List<String> s3Uris) { // check if the client is valid if (s3 == null) { System.out.println("Not a valide S3 Client"); return false; } boolean found = true; for (String s3Uri : s3Uris) { Matcher matcher = s3UriPattern.matcher(s3Uri); if (matcher.find()) { String bucket = matcher.group(1); String key = matcher.group(2); found &= s3.doesBucketExist(bucket) & (s3.getObject(bucket, key) != null); if (!found) { return false; } } else { return false; } } return true; }