List of usage examples for com.amazonaws.services.s3.model S3Object getObjectContent
public S3ObjectInputStream getObjectContent()
From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java
License:Open Source License
private String[] getStrings(S3Object sobj) throws IOException { this.s3clientLock.readLock().lock(); try {//from w w w .j a va 2 s .co m boolean encrypt = false; boolean compress = false; boolean lz4compress = false; int cl = (int) sobj.getObjectMetadata().getContentLength(); byte[] data = new byte[cl]; DataInputStream in = null; try { in = new DataInputStream(sobj.getObjectContent()); in.readFully(data); } catch (Exception e) { throw new IOException(e); } finally { try { in.close(); } catch (Exception e) { } } Map<String, String> mp = this.getUserMetaData(sobj.getObjectMetadata()); if (mp.containsKey("md5sum")) { try { byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum")); byte[] chash; chash = ServiceUtils.computeMD5Hash(data); if (!Arrays.equals(shash, chash)) throw new IOException("download corrupt at " + sobj.getKey()); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } } int size = Integer.parseInt((String) mp.get("size")); if (mp.containsKey("encrypt")) { encrypt = Boolean.parseBoolean((String) mp.get("encrypt")); } if (mp.containsKey("compress")) { compress = Boolean.parseBoolean((String) mp.get("compress")); } else if (mp.containsKey("lz4compress")) { lz4compress = Boolean.parseBoolean((String) mp.get("lz4compress")); } byte[] ivb = null; if (mp.containsKey("ivspec")) ivb = BaseEncoding.base64().decode(mp.get("ivspec")); if (encrypt) { if (ivb != null) data = EncryptUtils.decryptCBC(data, new IvParameterSpec(ivb)); else data = EncryptUtils.decryptCBC(data); } if (compress) data = CompressionUtils.decompressZLIB(data); else if (lz4compress) { data = CompressionUtils.decompressLz4(data, size); } String hast = new String(data); SDFSLogger.getLog().debug("reading hashes " + (String) mp.get("hashes") + " from " + sobj.getKey()); String[] st = hast.split(","); return st; } finally { this.s3clientLock.readLock().unlock(); } }
From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java
License:Open Source License
public byte[] getBytes(long id, int from, int to) throws IOException, DataArchivedException { // SDFSLogger.getLog().info("Downloading " + id); // SDFSLogger.getLog().info("Current readers :" + rr.incrementAndGet()); String haName = EncyptUtils.encHashArchiveName(id, Main.chunkStoreEncryptionEnabled); this.s3clientLock.readLock().lock(); S3Object sobj = null; byte[] data = null; // int ol = 0; try {/*from www .ja va 2 s . c o m*/ long tm = System.currentTimeMillis(); // ObjectMetadata omd = s3Service.getObjectMetadata(this.name, // "blocks/" + haName); // Map<String, String> mp = this.getUserMetaData(omd); // ol = Integer.parseInt(mp.get("compressedsize")); // if (ol <= to) { // to = ol; // SDFSLogger.getLog().info("change to=" + to); // } int cl = (int) to - from; GetObjectRequest gr = new GetObjectRequest(this.name, "blocks/" + haName); gr.setRange(from, to); sobj = s3Service.getObject(gr); InputStream in = sobj.getObjectContent(); data = new byte[cl]; IOUtils.readFully(in, data); IOUtils.closeQuietly(in); double dtm = (System.currentTimeMillis() - tm) / 1000d; double bps = (cl / 1024) / dtm; SDFSLogger.getLog().debug("read [" + id + "] at " + bps + " kbps"); // mp = this.getUserMetaData(omd); /* * try { mp.put("lastaccessed", * Long.toString(System.currentTimeMillis())); * omd.setUserMetadata(mp); CopyObjectRequest req = new * CopyObjectRequest(this.name, "blocks/" + haName, this.name, * "blocks/" + haName) .withNewObjectMetadata(omd); * s3Service.copyObject(req); } catch (Exception e) { * SDFSLogger.getLog().debug("error setting last accessed", e); } */ /* * if (mp.containsKey("deleted")) { boolean del = * Boolean.parseBoolean((String) mp.get("deleted")); if (del) { * S3Object kobj = s3Service.getObject(this.name, "keys/" + haName); * * int claims = this.getClaimedObjects(kobj, id); * * int delobj = 0; if (mp.containsKey("deleted-objects")) { delobj = * Integer.parseInt((String) mp .get("deleted-objects")) - claims; * if (delobj < 0) delobj = 0; } mp.remove("deleted"); * mp.put("deleted-objects", Integer.toString(delobj)); * mp.put("suspect", "true"); omd.setUserMetadata(mp); * CopyObjectRequest req = new CopyObjectRequest(this.name, "keys/" * + haName, this.name, "keys/" + haName) * .withNewObjectMetadata(omd); s3Service.copyObject(req); int _size * = Integer.parseInt((String) mp.get("size")); int _compressedSize * = Integer.parseInt((String) mp .get("compressedsize")); * HashBlobArchive.currentLength.addAndGet(_size); * HashBlobArchive.compressedLength.addAndGet(_compressedSize); * SDFSLogger.getLog().warn( "Reclaimed [" + claims + * "] blocks marked for deletion"); kobj.close(); } } */ dtm = (System.currentTimeMillis() - tm) / 1000d; bps = (cl / 1024) / dtm; } catch (AmazonS3Exception e) { if (e.getErrorCode().equalsIgnoreCase("InvalidObjectState")) throw new DataArchivedException(id, null); else { SDFSLogger.getLog().error( "unable to get block [" + id + "] at [blocks/" + haName + "] pos " + from + " to " + to, e); throw e; } } catch (Exception e) { throw new IOException(e); } finally { try { if (sobj != null) { sobj.close(); } } catch (Exception e) { } this.s3clientLock.readLock().unlock(); } return data; }
From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java
License:Open Source License
private void getData(long id, File f) throws Exception { // SDFSLogger.getLog().info("Downloading " + id); // SDFSLogger.getLog().info("Current readers :" + rr.incrementAndGet()); String haName = EncyptUtils.encHashArchiveName(id, Main.chunkStoreEncryptionEnabled); this.s3clientLock.readLock().lock(); S3Object sobj = null; try {//from www.j av a2 s .c o m long tm = System.currentTimeMillis(); ObjectMetadata omd = s3Service.getObjectMetadata(this.name, "blocks/" + haName); try { sobj = s3Service.getObject(this.name, "blocks/" + haName); } catch (Exception e) { throw new IOException(e); } int cl = (int) omd.getContentLength(); if (this.simpleS3) { FileOutputStream out = null; InputStream in = null; try { out = new FileOutputStream(f); in = sobj.getObjectContent(); IOUtils.copy(in, out); out.flush(); } catch (Exception e) { throw new IOException(e); } finally { IOUtils.closeQuietly(out); IOUtils.closeQuietly(in); } } else { this.multiPartDownload("blocks/" + haName, f); } double dtm = (System.currentTimeMillis() - tm) / 1000d; double bps = (cl / 1024) / dtm; SDFSLogger.getLog().debug("read [" + id + "] at " + bps + " kbps"); Map<String, String> mp = this.getUserMetaData(omd); if (md5sum && mp.containsKey("md5sum")) { byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum")); InputStream in = new FileInputStream(f); byte[] chash = ServiceUtils.computeMD5Hash(in); IOUtils.closeQuietly(in); if (!Arrays.equals(shash, chash)) throw new IOException("download corrupt at " + id); } try { mp.put("lastaccessed", Long.toString(System.currentTimeMillis())); omd.setUserMetadata(mp); updateObject("blocks/" + haName, omd); } catch (Exception e) { SDFSLogger.getLog().debug("error setting last accessed", e); } if (mp.containsKey("deleted")) { boolean del = Boolean.parseBoolean((String) mp.get("deleted")); if (del) { S3Object kobj = s3Service.getObject(this.name, "keys/" + haName); int claims = this.getClaimedObjects(kobj, id); int delobj = 0; if (mp.containsKey("deleted-objects")) { delobj = Integer.parseInt((String) mp.get("deleted-objects")) - claims; if (delobj < 0) delobj = 0; } mp.remove("deleted"); mp.put("deleted-objects", Integer.toString(delobj)); mp.put("suspect", "true"); omd.setUserMetadata(mp); updateObject("keys/" + haName, omd); int _size = Integer.parseInt((String) mp.get("size")); int _compressedSize = Integer.parseInt((String) mp.get("compressedsize")); HashBlobArchive.currentLength.addAndGet(_size); HashBlobArchive.compressedLength.addAndGet(_compressedSize); SDFSLogger.getLog().warn("Reclaimed [" + claims + "] blocks marked for deletion"); kobj.close(); } } dtm = (System.currentTimeMillis() - tm) / 1000d; bps = (cl / 1024) / dtm; } catch (AmazonS3Exception e) { if (e.getErrorCode().equalsIgnoreCase("InvalidObjectState")) throw new DataArchivedException(id, null); else { SDFSLogger.getLog().error("unable to get block [" + id + "] at [blocks/" + haName + "]", e); throw e; } } finally { try { if (sobj != null) { sobj.close(); } } catch (Exception e) { } this.s3clientLock.readLock().unlock(); } }
From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java
License:Open Source License
public StringResult getStringResult(String key) throws IOException, InterruptedException { this.s3clientLock.readLock().lock(); S3Object sobj = null; try {//www . ja v a 2 s. c om ObjectMetadata md = null; try { sobj = s3Service.getObject(getName(), key); md = s3Service.getObjectMetadata(this.name, key); } catch (Exception e) { throw new IOException(e); } int cl = (int) md.getContentLength(); byte[] data = new byte[cl]; DataInputStream in = null; try { in = new DataInputStream(sobj.getObjectContent()); in.readFully(data); } catch (Exception e) { throw new IOException(e); } finally { if (in != null) in.close(); } boolean encrypt = false; boolean compress = false; boolean lz4compress = false; Map<String, String> mp = this.getUserMetaData(md); byte[] ivb = null; if (mp.containsKey("ivspec")) { ivb = BaseEncoding.base64().decode(mp.get("ivspec")); } if (mp.containsKey("md5sum")) { try { byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum")); byte[] chash = ServiceUtils.computeMD5Hash(data); if (!Arrays.equals(shash, chash)) throw new IOException("download corrupt at " + sobj.getKey()); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } } int size = Integer.parseInt(mp.get("size")); encrypt = Boolean.parseBoolean(mp.get("encrypt")); lz4compress = Boolean.parseBoolean(mp.get("lz4compress")); boolean changed = false; Long hid = EncyptUtils.decHashArchiveName(sobj.getKey().substring(5), encrypt); if (this.clustered) mp = s3Service.getObjectMetadata(this.name, this.getClaimName(hid)).getUserMetadata(); if (mp.containsKey("deleted")) { mp.remove("deleted"); changed = true; } if (mp.containsKey("deleted-objects")) { mp.remove("deleted-objects"); changed = true; } if (encrypt) { if (ivb != null) { data = EncryptUtils.decryptCBC(data, new IvParameterSpec(ivb)); } else { data = EncryptUtils.decryptCBC(data); } } if (compress) data = CompressionUtils.decompressZLIB(data); else if (lz4compress) { data = CompressionUtils.decompressLz4(data, size); } String hast = new String(data); SDFSLogger.getLog().debug("reading hashes " + (String) mp.get("objects") + " from " + hid + " encn " + sobj.getKey().substring(5)); StringTokenizer ht = new StringTokenizer(hast, ","); StringResult st = new StringResult(); st.id = hid; st.st = ht; if (mp.containsKey("bsize")) { HashBlobArchive.currentLength.addAndGet(Integer.parseInt(mp.get("bsize"))); } if (mp.containsKey("bcompressedsize")) { HashBlobArchive.compressedLength.addAndGet(Integer.parseInt(mp.get("bcompressedsize"))); } if (changed) { try { md = sobj.getObjectMetadata(); md.setUserMetadata(mp); String kn = null; if (this.clustered) kn = this.getClaimName(hid); else kn = sobj.getKey(); this.updateObject(kn, md); } catch (Exception e) { throw new IOException(e); } } return st; } finally { if (sobj != null) sobj.close(); this.s3clientLock.readLock().unlock(); } }
From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java
License:Open Source License
@Override public void downloadFile(String nm, File to, String pp) throws IOException { this.s3clientLock.readLock().lock(); try {//from w ww.j a v a 2 s. c o m while (nm.startsWith(File.separator)) nm = nm.substring(1); String rnd = RandomGUID.getGuid(); File p = new File(this.staged_sync_location, rnd); File z = new File(this.staged_sync_location, rnd + ".uz"); File e = new File(this.staged_sync_location, rnd + ".de"); while (z.exists()) { rnd = RandomGUID.getGuid(); p = new File(this.staged_sync_location, rnd); z = new File(this.staged_sync_location, rnd + ".uz"); e = new File(this.staged_sync_location, rnd + ".de"); } if (nm.startsWith(File.separator)) nm = nm.substring(1); String haName = EncyptUtils.encString(nm, Main.chunkStoreEncryptionEnabled); Map<String, String> mp = null; byte[] shash = null; try { if (this.simpleS3) { S3Object obj = null; SDFSLogger.getLog().debug("downloading " + pp + "/" + haName); obj = s3Service.getObject(this.name, pp + "/" + haName); BufferedInputStream in = new BufferedInputStream(obj.getObjectContent()); BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(p)); IOUtils.copy(in, out); out.flush(); out.close(); in.close(); ObjectMetadata omd = s3Service.getObjectMetadata(name, pp + "/" + haName); mp = this.getUserMetaData(omd); SDFSLogger.getLog().debug("mp sz=" + mp.size()); try { if (obj != null) obj.close(); } catch (Exception e1) { } } else { SDFSLogger.getLog().debug("downloading " + pp + "/" + haName); this.multiPartDownload(pp + "/" + haName, p); ObjectMetadata omd = s3Service.getObjectMetadata(name, pp + "/" + haName); mp = this.getUserMetaData(omd); if (md5sum && mp.containsKey("md5sum")) { shash = BaseEncoding.base64().decode(omd.getUserMetaDataOf("md5sum")); } } if (shash != null && !FileUtils.fileValid(p, shash)) throw new IOException("file " + p.getPath() + " is corrupt"); boolean encrypt = false; boolean lz4compress = false; if (mp.containsKey("encrypt")) { encrypt = Boolean.parseBoolean(mp.get("encrypt")); } if (mp.containsKey("lz4compress")) { lz4compress = Boolean.parseBoolean(mp.get("lz4compress")); } byte[] ivb = null; if (mp.containsKey("ivspec")) { ivb = BaseEncoding.base64().decode(mp.get("ivspec")); } SDFSLogger.getLog().debug("compress=" + lz4compress + " " + mp.get("lz4compress")); if (mp.containsKey("symlink")) { if (OSValidator.isWindows()) throw new IOException("unable to restore symlinks to windows"); else { String spth = EncyptUtils.decString(mp.get("symlink"), encrypt); Path srcP = Paths.get(spth); Path dstP = Paths.get(to.getPath()); Files.createSymbolicLink(dstP, srcP); } } else if (mp.containsKey("directory")) { to.mkdirs(); FileUtils.setFileMetaData(to, mp, encrypt); p.delete(); } else { if (encrypt) { if (ivb != null) { EncryptUtils.decryptFile(p, e, new IvParameterSpec(ivb)); } else { EncryptUtils.decryptFile(p, e); } p.delete(); p = e; } if (lz4compress) { CompressionUtils.decompressFile(p, z); p.delete(); p = z; } File parent = to.getParentFile(); if (!parent.exists()) parent.mkdirs(); BufferedInputStream is = new BufferedInputStream(new FileInputStream(p)); BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(to)); IOUtils.copy(is, os); os.flush(); os.close(); is.close(); FileUtils.setFileMetaData(to, mp, encrypt); SDFSLogger.getLog().debug("updated " + to + " sz=" + to.length()); } } catch (Exception e1) { throw new IOException(e1); } finally { p.delete(); z.delete(); e.delete(); } } finally { this.s3clientLock.readLock().unlock(); } }
From source file:org.p365.S3Sample.java
License:Open Source License
public static void main(String[] args) throws IOException { /*/*from w w w .j av a 2 s . c o m*/ * This credentials provider implementation loads your AWS credentials * from a properties file at the root of your classpath. * * Important: Be sure to fill in your AWS access credentials in the * AwsCredentials.properties file before you try to run this * sample. * http://aws.amazon.com/security-credentials */ AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider()); Region usWest2 = Region.getRegion(Regions.US_WEST_2); s3.setRegion(usWest2); String bucketName = "mynewbuket"; String key = "Myobj/sd.jpg"; System.out.println("==========================================="); System.out.println("Getting Started with Amazon S3"); System.out.println("===========================================\n"); try { /* * Create a new S3 bucket - Amazon S3 bucket names are globally unique, * so once a bucket name has been taken by any user, you can't create * another bucket with that same name. * * You can optionally specify a location for your bucket if you want to * keep your data closer to your applications or users. */ System.out.println("Creating bucket " + bucketName + "\n"); if (!s3.doesBucketExist(bucketName)) { s3.createBucket(bucketName); } /* * List the buckets in your account */ System.out.println("Listing buckets"); for (Bucket bucket : s3.listBuckets()) { System.out.println(" - " + bucket.getName()); } System.out.println(); /* * Upload an object to your bucket - You can easily upload a file to * S3, or upload directly an InputStream if you know the length of * the data in the stream. You can also specify your own metadata * when uploading to S3, which allows you set a variety of options * like content-type and content-encoding, plus additional metadata * specific to your applications. */ System.out.println("Uploading a new object to S3 from a file\n"); String pathname = "D:\\Program Files\\apache-tomcat-7.0.42\\webapps\\WorkerForP365\\src\\AAA_1465.jpg"; File file = new File(pathname); s3.putObject( new PutObjectRequest(bucketName, key, file).withCannedAcl(CannedAccessControlList.PublicRead)); /* * Download an object - When you download an object, you get all of * the object's metadata and a stream from which to read the contents. * It's important to read the contents of the stream as quickly as * possibly since the data is streamed directly from Amazon S3 and your * network connection will remain open until you read all the data or * close the input stream. * * GetObjectRequest also supports several other options, including * conditional downloading of objects based on modification times, * ETags, and selectively downloading a range of an object. */ System.out.println("Downloading an object"); S3Object object = s3.getObject(new GetObjectRequest(bucketName, key)); System.out.println("Content-Type: " + object.getObjectMetadata().getContentType()); displayTextInputStream(object.getObjectContent()); /* * List objects in your bucket by prefix - There are many options for * listing the objects in your bucket. Keep in mind that buckets with * many objects might truncate their results when listing their objects, * so be sure to check if the returned object listing is truncated, and * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve * additional results. */ System.out.println("Listing objects"); ObjectListing objectListing = s3 .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My")); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println( " - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")"); } System.out.println(); /* * Delete an object - Unless versioning has been turned on for your bucket, * there is no way to undelete an object, so use caution when deleting objects. */ //System.out.println("Deleting an object\n"); //s3.deleteObject(bucketName, key); /* * Delete a bucket - A bucket must be completely empty before it can be * deleted, so remember to delete any objects from your buckets before * you try to delete them. */ //System.out.println("Deleting bucket " + bucketName + "\n"); //s3.deleteBucket(bucketName); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException, which means your request made it " + "to Amazon S3, but was rejected with an error response for some reason."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException, which means the client encountered " + "a serious internal problem while trying to communicate with S3, " + "such as not being able to access the network."); System.out.println("Error Message: " + ace.getMessage()); } }
From source file:org.pentaho.amazon.client.impl.S3ClientImpl.java
License:Apache License
private String readLogFromS3(String stagingBucketName, String key) { Scanner logScanner = null;/*from w w w .java2s .co m*/ S3ObjectInputStream s3ObjectInputStream = null; GZIPInputStream gzipInputStream = null; String lineSeparator = System.getProperty("line.separator"); StringBuilder logContents = new StringBuilder(); S3Object outObject; try { if (s3Client.doesObjectExist(stagingBucketName, key)) { outObject = s3Client.getObject(stagingBucketName, key); s3ObjectInputStream = outObject.getObjectContent(); gzipInputStream = new GZIPInputStream(s3ObjectInputStream); logScanner = new Scanner(gzipInputStream); while (logScanner.hasNextLine()) { logContents.append(logScanner.nextLine() + lineSeparator); } } } catch (IOException e) { e.printStackTrace(); } finally { try { if (logScanner != null) { logScanner.close(); } if (s3ObjectInputStream != null) { s3ObjectInputStream.close(); } if (gzipInputStream != null) { gzipInputStream.close(); } } catch (IOException e) { //do nothing } } return logContents.toString(); }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;/*from w w w. j a va 2 s .c om*/ String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Executes a Hive job into the AWS Elastic MapReduce service. */// w w w. j a v a2s . c om public Result execute(Result result, int arg1) throws KettleException { // Setup a log file. Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // Create and connect an AWS service. AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); AmazonS3 s3Client = new AmazonS3Client(awsCredentials); // Get bucket name and S3 URL. String stagingBucketName = GetBucketName(stagingDir); String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$ // Prepare staging S3 URL for Hive script file. String stagingS3qUrl = ""; if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$ // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path} if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$ stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$ } else { stagingS3qUrl = qUrl; } } else { // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory. // First, check for the correct protocol. if (!qUrl.startsWith("file:")) { //$NON-NLS-1$ if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$ } } // pull down .q file from VSF FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl)); File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$ tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut); // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/ String key = GetKeyFromS3Url(stagingDir); if (key == null) { key = qFile.getName().getBaseName(); } else { key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$ } // delete the previous .q file in S3 try { s3Client.deleteObject(stagingBucketName, key); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // Put .q file in S3 Log Directory. s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile)); stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$ } // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR // job. jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$ RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create an EMR job flow, start a step to setup Hive and get the job flow ID. runJobFlowRequest = createJobFlow(); runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); hadoopJobFlowId = runJobFlowResult.getJobFlowId(); } // Now EMR job flow is ready to accept a Run Hive Script step. // First, prepare a Job Flow ID list. List<String> jobFlowIds = new ArrayList<String>(); jobFlowIds.add(hadoopJobFlowId); // Configure a HadoopJarStep. String args = "s3://elasticmapreduce/libs/hive/hive-script " + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f " + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args); // Add a Run Hive Script step to the existing job flow. AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); // Set a logging interval. String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 10; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$ loggingIntervalS)); } // monitor and log if intended. if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; //$NON-NLS-1$ while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$ "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId) + executionState); if (parentJob.isStopped()) { if (!alive) { TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest(); terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId); emrClient.terminateJobFlows(terminateJobFlowsRequest); } break; } try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { logError(Const.getStackTracker(ie)); } } if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$ result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$ ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pieShare.pieDrive.adapter.s3.S3Adapter.java
@Override public void download(PieDriveFile file, OutputStream stream) throws AdaptorException { byte[] buf = new byte[1024]; int count = 0; S3Object object = s3Auth.getClient().getObject(new GetObjectRequest(bucketName, file.getUuid())); InputStream objectData = object.getObjectContent(); try {/*from ww w . j av a2 s .c om*/ while ((count = objectData.read(buf)) != -1) { if (Thread.interrupted()) { throw new AdaptorException("Download interrupted."); } stream.write(buf, 0, count); } stream.close(); objectData.close(); PieLogger.trace(S3Adapter.class, "{} downloaded", file.getUuid()); } catch (IOException e) { throw new AdaptorException(e); } catch (AmazonServiceException ase) { throw new AdaptorException(ase); } catch (AmazonClientException ace) { throw new AdaptorException(ace); } }