List of usage examples for com.amazonaws.services.s3 AmazonS3 doesBucketExist
@Deprecated public boolean doesBucketExist(String bucketName) throws SdkClientException, AmazonServiceException;
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;/*w w w. j a va 2s . c o m*/ String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:thinkbig.util.Util.java
License:Open Source License
/** * returns (and prints) all (only non-empty objects if excludeBlanks is true) objects * in an S3 bucket as list of Strings/* www .j a v a 2 s.com*/ * null if no such bucket exists * @param s3 * @param bucketName * @param excludeBlanks * @return */ public static List<String> getAllObjectKeysInBucket(AmazonS3 s3, String bucketName, boolean excludeBlanks) { // check if the client is valid if (s3 == null) { System.out.println("Not a valid S3 Client"); return null; } // check if the bucket exists if (!s3.doesBucketExist(bucketName)) { System.out.println("The bucket '" + bucketName + "' does not exist!"); return null; } System.out.println("Listing objects in bucket '" + bucketName + "' "); ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(bucketName)); if (objectListing == null) { return null; } List<String> objectKeys = new ArrayList<String>(); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { String key = objectSummary.getKey(); if (!excludeBlanks || objectSummary.getSize() > 0) { objectKeys.add(key); System.out.println(" - " + key + " " + "(size = " + objectSummary.getSize() + ")"); } } return objectKeys; }
From source file:thinkbig.util.Util.java
License:Open Source License
/** * Check if all the S3 Uris exist/*from w w w .j av a2 s . co m*/ * @param s3 * @param s3Uris * @return true, if all Uris in the list exist */ public static boolean checkIfUrisExist(AmazonS3 s3, List<String> s3Uris) { // check if the client is valid if (s3 == null) { System.out.println("Not a valide S3 Client"); return false; } boolean found = true; for (String s3Uri : s3Uris) { Matcher matcher = s3UriPattern.matcher(s3Uri); if (matcher.find()) { String bucket = matcher.group(1); String key = matcher.group(2); found &= s3.doesBucketExist(bucket) & (s3.getObject(bucket, key) != null); if (!found) { return false; } } else { return false; } } return true; }