Example usage for com.amazonaws.services.s3.model S3Object getObjectContent

Introduction

In this page you can find the example usage for com.amazonaws.services.s3.model S3Object getObjectContent.

Prototype

public S3ObjectInputStream getObjectContent()

Source Link

Document

Gets the input stream containing the contents of this object.

Usage

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

private String[] getStrings(S3Object sobj) throws IOException {
    this.s3clientLock.readLock().lock();
    try {//from  w w  w .j  a va  2  s  .co m
        boolean encrypt = false;
        boolean compress = false;
        boolean lz4compress = false;

        int cl = (int) sobj.getObjectMetadata().getContentLength();

        byte[] data = new byte[cl];
        DataInputStream in = null;
        try {
            in = new DataInputStream(sobj.getObjectContent());
            in.readFully(data);

        } catch (Exception e) {
            throw new IOException(e);
        } finally {
            try {
                in.close();
            } catch (Exception e) {
            }
        }
        Map<String, String> mp = this.getUserMetaData(sobj.getObjectMetadata());
        if (mp.containsKey("md5sum")) {
            try {
                byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum"));
                byte[] chash;
                chash = ServiceUtils.computeMD5Hash(data);
                if (!Arrays.equals(shash, chash))
                    throw new IOException("download corrupt at " + sobj.getKey());
            } catch (NoSuchAlgorithmException e) {
                throw new IOException(e);
            }
        }
        int size = Integer.parseInt((String) mp.get("size"));
        if (mp.containsKey("encrypt")) {
            encrypt = Boolean.parseBoolean((String) mp.get("encrypt"));
        }
        if (mp.containsKey("compress")) {
            compress = Boolean.parseBoolean((String) mp.get("compress"));
        } else if (mp.containsKey("lz4compress")) {

            lz4compress = Boolean.parseBoolean((String) mp.get("lz4compress"));
        }
        byte[] ivb = null;
        if (mp.containsKey("ivspec"))
            ivb = BaseEncoding.base64().decode(mp.get("ivspec"));
        if (encrypt) {
            if (ivb != null)
                data = EncryptUtils.decryptCBC(data, new IvParameterSpec(ivb));
            else
                data = EncryptUtils.decryptCBC(data);
        }
        if (compress)
            data = CompressionUtils.decompressZLIB(data);
        else if (lz4compress) {
            data = CompressionUtils.decompressLz4(data, size);
        }
        String hast = new String(data);
        SDFSLogger.getLog().debug("reading hashes " + (String) mp.get("hashes") + " from " + sobj.getKey());
        String[] st = hast.split(",");
        return st;
    } finally {
        this.s3clientLock.readLock().unlock();
    }
}

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

public byte[] getBytes(long id, int from, int to) throws IOException, DataArchivedException {
    // SDFSLogger.getLog().info("Downloading " + id);
    // SDFSLogger.getLog().info("Current readers :" + rr.incrementAndGet());
    String haName = EncyptUtils.encHashArchiveName(id, Main.chunkStoreEncryptionEnabled);
    this.s3clientLock.readLock().lock();
    S3Object sobj = null;
    byte[] data = null;
    // int ol = 0;
    try {/*from   www .ja  va  2 s  . c  o  m*/

        long tm = System.currentTimeMillis();
        // ObjectMetadata omd = s3Service.getObjectMetadata(this.name,
        // "blocks/" + haName);
        // Map<String, String> mp = this.getUserMetaData(omd);
        // ol = Integer.parseInt(mp.get("compressedsize"));
        // if (ol <= to) {
        // to = ol;
        // SDFSLogger.getLog().info("change to=" + to);
        // }
        int cl = (int) to - from;
        GetObjectRequest gr = new GetObjectRequest(this.name, "blocks/" + haName);
        gr.setRange(from, to);
        sobj = s3Service.getObject(gr);
        InputStream in = sobj.getObjectContent();
        data = new byte[cl];
        IOUtils.readFully(in, data);
        IOUtils.closeQuietly(in);
        double dtm = (System.currentTimeMillis() - tm) / 1000d;
        double bps = (cl / 1024) / dtm;
        SDFSLogger.getLog().debug("read [" + id + "] at " + bps + " kbps");
        // mp = this.getUserMetaData(omd);
        /*
         * try { mp.put("lastaccessed",
         * Long.toString(System.currentTimeMillis()));
         * omd.setUserMetadata(mp); CopyObjectRequest req = new
         * CopyObjectRequest(this.name, "blocks/" + haName, this.name,
         * "blocks/" + haName) .withNewObjectMetadata(omd);
         * s3Service.copyObject(req); } catch (Exception e) {
         * SDFSLogger.getLog().debug("error setting last accessed", e); }
         */
        /*
         * if (mp.containsKey("deleted")) { boolean del =
         * Boolean.parseBoolean((String) mp.get("deleted")); if (del) {
         * S3Object kobj = s3Service.getObject(this.name, "keys/" + haName);
         * 
         * int claims = this.getClaimedObjects(kobj, id);
         * 
         * int delobj = 0; if (mp.containsKey("deleted-objects")) { delobj =
         * Integer.parseInt((String) mp .get("deleted-objects")) - claims;
         * if (delobj < 0) delobj = 0; } mp.remove("deleted");
         * mp.put("deleted-objects", Integer.toString(delobj));
         * mp.put("suspect", "true"); omd.setUserMetadata(mp);
         * CopyObjectRequest req = new CopyObjectRequest(this.name, "keys/"
         * + haName, this.name, "keys/" + haName)
         * .withNewObjectMetadata(omd); s3Service.copyObject(req); int _size
         * = Integer.parseInt((String) mp.get("size")); int _compressedSize
         * = Integer.parseInt((String) mp .get("compressedsize"));
         * HashBlobArchive.currentLength.addAndGet(_size);
         * HashBlobArchive.compressedLength.addAndGet(_compressedSize);
         * SDFSLogger.getLog().warn( "Reclaimed [" + claims +
         * "] blocks marked for deletion"); kobj.close(); } }
         */
        dtm = (System.currentTimeMillis() - tm) / 1000d;
        bps = (cl / 1024) / dtm;
    } catch (AmazonS3Exception e) {
        if (e.getErrorCode().equalsIgnoreCase("InvalidObjectState"))
            throw new DataArchivedException(id, null);
        else {
            SDFSLogger.getLog().error(
                    "unable to get block [" + id + "] at [blocks/" + haName + "] pos " + from + " to " + to, e);
            throw e;

        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        try {
            if (sobj != null) {
                sobj.close();
            }
        } catch (Exception e) {

        }
        this.s3clientLock.readLock().unlock();
    }
    return data;
}

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

private void getData(long id, File f) throws Exception {
    // SDFSLogger.getLog().info("Downloading " + id);
    // SDFSLogger.getLog().info("Current readers :" + rr.incrementAndGet());
    String haName = EncyptUtils.encHashArchiveName(id, Main.chunkStoreEncryptionEnabled);
    this.s3clientLock.readLock().lock();
    S3Object sobj = null;
    try {//from www.j  av  a2  s .c  o  m

        long tm = System.currentTimeMillis();
        ObjectMetadata omd = s3Service.getObjectMetadata(this.name, "blocks/" + haName);

        try {
            sobj = s3Service.getObject(this.name, "blocks/" + haName);
        } catch (Exception e) {
            throw new IOException(e);
        }
        int cl = (int) omd.getContentLength();
        if (this.simpleS3) {

            FileOutputStream out = null;
            InputStream in = null;
            try {
                out = new FileOutputStream(f);
                in = sobj.getObjectContent();
                IOUtils.copy(in, out);
                out.flush();

            } catch (Exception e) {
                throw new IOException(e);
            } finally {
                IOUtils.closeQuietly(out);
                IOUtils.closeQuietly(in);

            }
        } else {
            this.multiPartDownload("blocks/" + haName, f);
        }
        double dtm = (System.currentTimeMillis() - tm) / 1000d;
        double bps = (cl / 1024) / dtm;
        SDFSLogger.getLog().debug("read [" + id + "] at " + bps + " kbps");
        Map<String, String> mp = this.getUserMetaData(omd);
        if (md5sum && mp.containsKey("md5sum")) {
            byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum"));

            InputStream in = new FileInputStream(f);
            byte[] chash = ServiceUtils.computeMD5Hash(in);
            IOUtils.closeQuietly(in);
            if (!Arrays.equals(shash, chash))
                throw new IOException("download corrupt at " + id);
        }

        try {
            mp.put("lastaccessed", Long.toString(System.currentTimeMillis()));
            omd.setUserMetadata(mp);

            updateObject("blocks/" + haName, omd);
        } catch (Exception e) {
            SDFSLogger.getLog().debug("error setting last accessed", e);
        }
        if (mp.containsKey("deleted")) {
            boolean del = Boolean.parseBoolean((String) mp.get("deleted"));
            if (del) {
                S3Object kobj = s3Service.getObject(this.name, "keys/" + haName);

                int claims = this.getClaimedObjects(kobj, id);

                int delobj = 0;
                if (mp.containsKey("deleted-objects")) {
                    delobj = Integer.parseInt((String) mp.get("deleted-objects")) - claims;
                    if (delobj < 0)
                        delobj = 0;
                }
                mp.remove("deleted");
                mp.put("deleted-objects", Integer.toString(delobj));
                mp.put("suspect", "true");
                omd.setUserMetadata(mp);

                updateObject("keys/" + haName, omd);
                int _size = Integer.parseInt((String) mp.get("size"));
                int _compressedSize = Integer.parseInt((String) mp.get("compressedsize"));
                HashBlobArchive.currentLength.addAndGet(_size);
                HashBlobArchive.compressedLength.addAndGet(_compressedSize);
                SDFSLogger.getLog().warn("Reclaimed [" + claims + "] blocks marked for deletion");
                kobj.close();
            }
        }
        dtm = (System.currentTimeMillis() - tm) / 1000d;
        bps = (cl / 1024) / dtm;
    } catch (AmazonS3Exception e) {
        if (e.getErrorCode().equalsIgnoreCase("InvalidObjectState"))
            throw new DataArchivedException(id, null);
        else {
            SDFSLogger.getLog().error("unable to get block [" + id + "] at [blocks/" + haName + "]", e);
            throw e;

        }
    } finally {
        try {
            if (sobj != null) {
                sobj.close();
            }
        } catch (Exception e) {

        }
        this.s3clientLock.readLock().unlock();
    }
}

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

public StringResult getStringResult(String key) throws IOException, InterruptedException {
    this.s3clientLock.readLock().lock();
    S3Object sobj = null;
    try {//www .  ja v  a  2  s.  c om

        ObjectMetadata md = null;
        try {
            sobj = s3Service.getObject(getName(), key);
            md = s3Service.getObjectMetadata(this.name, key);
        } catch (Exception e) {
            throw new IOException(e);
        }
        int cl = (int) md.getContentLength();

        byte[] data = new byte[cl];
        DataInputStream in = null;
        try {
            in = new DataInputStream(sobj.getObjectContent());
            in.readFully(data);

        } catch (Exception e) {
            throw new IOException(e);
        } finally {
            if (in != null)
                in.close();
        }
        boolean encrypt = false;
        boolean compress = false;
        boolean lz4compress = false;
        Map<String, String> mp = this.getUserMetaData(md);
        byte[] ivb = null;
        if (mp.containsKey("ivspec")) {
            ivb = BaseEncoding.base64().decode(mp.get("ivspec"));
        }
        if (mp.containsKey("md5sum")) {
            try {
                byte[] shash = BaseEncoding.base64().decode(mp.get("md5sum"));
                byte[] chash = ServiceUtils.computeMD5Hash(data);
                if (!Arrays.equals(shash, chash))
                    throw new IOException("download corrupt at " + sobj.getKey());
            } catch (NoSuchAlgorithmException e) {
                throw new IOException(e);
            }
        }
        int size = Integer.parseInt(mp.get("size"));
        encrypt = Boolean.parseBoolean(mp.get("encrypt"));

        lz4compress = Boolean.parseBoolean(mp.get("lz4compress"));
        boolean changed = false;

        Long hid = EncyptUtils.decHashArchiveName(sobj.getKey().substring(5), encrypt);
        if (this.clustered)
            mp = s3Service.getObjectMetadata(this.name, this.getClaimName(hid)).getUserMetadata();
        if (mp.containsKey("deleted")) {
            mp.remove("deleted");
            changed = true;
        }
        if (mp.containsKey("deleted-objects")) {
            mp.remove("deleted-objects");
            changed = true;
        }

        if (encrypt) {

            if (ivb != null) {
                data = EncryptUtils.decryptCBC(data, new IvParameterSpec(ivb));
            } else {
                data = EncryptUtils.decryptCBC(data);
            }
        }
        if (compress)
            data = CompressionUtils.decompressZLIB(data);
        else if (lz4compress) {
            data = CompressionUtils.decompressLz4(data, size);
        }

        String hast = new String(data);
        SDFSLogger.getLog().debug("reading hashes " + (String) mp.get("objects") + " from " + hid + " encn "
                + sobj.getKey().substring(5));
        StringTokenizer ht = new StringTokenizer(hast, ",");
        StringResult st = new StringResult();
        st.id = hid;
        st.st = ht;
        if (mp.containsKey("bsize")) {
            HashBlobArchive.currentLength.addAndGet(Integer.parseInt(mp.get("bsize")));
        }
        if (mp.containsKey("bcompressedsize")) {
            HashBlobArchive.compressedLength.addAndGet(Integer.parseInt(mp.get("bcompressedsize")));
        }
        if (changed) {
            try {
                md = sobj.getObjectMetadata();
                md.setUserMetadata(mp);
                String kn = null;
                if (this.clustered)
                    kn = this.getClaimName(hid);
                else
                    kn = sobj.getKey();

                this.updateObject(kn, md);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        return st;
    } finally {
        if (sobj != null)
            sobj.close();
        this.s3clientLock.readLock().unlock();
    }
}

From source file:org.opendedup.sdfs.filestore.cloud.BatchAwsS3ChunkStore.java

License:Open Source License

@Override
public void downloadFile(String nm, File to, String pp) throws IOException {
    this.s3clientLock.readLock().lock();
    try {//from   w  ww.j  a  v a 2 s.  c o  m
        while (nm.startsWith(File.separator))
            nm = nm.substring(1);
        String rnd = RandomGUID.getGuid();
        File p = new File(this.staged_sync_location, rnd);
        File z = new File(this.staged_sync_location, rnd + ".uz");
        File e = new File(this.staged_sync_location, rnd + ".de");
        while (z.exists()) {
            rnd = RandomGUID.getGuid();
            p = new File(this.staged_sync_location, rnd);
            z = new File(this.staged_sync_location, rnd + ".uz");
            e = new File(this.staged_sync_location, rnd + ".de");
        }
        if (nm.startsWith(File.separator))
            nm = nm.substring(1);
        String haName = EncyptUtils.encString(nm, Main.chunkStoreEncryptionEnabled);
        Map<String, String> mp = null;
        byte[] shash = null;
        try {
            if (this.simpleS3) {
                S3Object obj = null;
                SDFSLogger.getLog().debug("downloading " + pp + "/" + haName);
                obj = s3Service.getObject(this.name, pp + "/" + haName);
                BufferedInputStream in = new BufferedInputStream(obj.getObjectContent());
                BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(p));
                IOUtils.copy(in, out);
                out.flush();
                out.close();
                in.close();
                ObjectMetadata omd = s3Service.getObjectMetadata(name, pp + "/" + haName);
                mp = this.getUserMetaData(omd);
                SDFSLogger.getLog().debug("mp sz=" + mp.size());
                try {
                    if (obj != null)
                        obj.close();
                } catch (Exception e1) {
                }
            } else {
                SDFSLogger.getLog().debug("downloading " + pp + "/" + haName);
                this.multiPartDownload(pp + "/" + haName, p);
                ObjectMetadata omd = s3Service.getObjectMetadata(name, pp + "/" + haName);
                mp = this.getUserMetaData(omd);
                if (md5sum && mp.containsKey("md5sum")) {
                    shash = BaseEncoding.base64().decode(omd.getUserMetaDataOf("md5sum"));
                }
            }
            if (shash != null && !FileUtils.fileValid(p, shash))
                throw new IOException("file " + p.getPath() + " is corrupt");
            boolean encrypt = false;
            boolean lz4compress = false;
            if (mp.containsKey("encrypt")) {
                encrypt = Boolean.parseBoolean(mp.get("encrypt"));
            }
            if (mp.containsKey("lz4compress")) {
                lz4compress = Boolean.parseBoolean(mp.get("lz4compress"));
            }
            byte[] ivb = null;
            if (mp.containsKey("ivspec")) {
                ivb = BaseEncoding.base64().decode(mp.get("ivspec"));
            }
            SDFSLogger.getLog().debug("compress=" + lz4compress + " " + mp.get("lz4compress"));

            if (mp.containsKey("symlink")) {
                if (OSValidator.isWindows())
                    throw new IOException("unable to restore symlinks to windows");
                else {
                    String spth = EncyptUtils.decString(mp.get("symlink"), encrypt);
                    Path srcP = Paths.get(spth);
                    Path dstP = Paths.get(to.getPath());
                    Files.createSymbolicLink(dstP, srcP);
                }
            } else if (mp.containsKey("directory")) {
                to.mkdirs();
                FileUtils.setFileMetaData(to, mp, encrypt);
                p.delete();
            } else {
                if (encrypt) {
                    if (ivb != null) {
                        EncryptUtils.decryptFile(p, e, new IvParameterSpec(ivb));
                    } else {
                        EncryptUtils.decryptFile(p, e);
                    }
                    p.delete();
                    p = e;
                }
                if (lz4compress) {
                    CompressionUtils.decompressFile(p, z);
                    p.delete();
                    p = z;
                }

                File parent = to.getParentFile();
                if (!parent.exists())
                    parent.mkdirs();
                BufferedInputStream is = new BufferedInputStream(new FileInputStream(p));
                BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(to));
                IOUtils.copy(is, os);
                os.flush();
                os.close();
                is.close();
                FileUtils.setFileMetaData(to, mp, encrypt);
                SDFSLogger.getLog().debug("updated " + to + " sz=" + to.length());
            }

        } catch (Exception e1) {
            throw new IOException(e1);
        } finally {

            p.delete();
            z.delete();
            e.delete();
        }
    } finally {
        this.s3clientLock.readLock().unlock();
    }
}

From source file:org.p365.S3Sample.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /*/*from  w  w  w  .j av  a  2  s  . c  o  m*/
     * This credentials provider implementation loads your AWS credentials
     * from a properties file at the root of your classpath.
     *
     * Important: Be sure to fill in your AWS access credentials in the
     *            AwsCredentials.properties file before you try to run this
     *            sample.
     * http://aws.amazon.com/security-credentials
     */
    AmazonS3 s3 = new AmazonS3Client(new ClasspathPropertiesFileCredentialsProvider());
    Region usWest2 = Region.getRegion(Regions.US_WEST_2);
    s3.setRegion(usWest2);

    String bucketName = "mynewbuket";
    String key = "Myobj/sd.jpg";

    System.out.println("===========================================");
    System.out.println("Getting Started with Amazon S3");
    System.out.println("===========================================\n");

    try {
        /*
         * Create a new S3 bucket - Amazon S3 bucket names are globally unique,
         * so once a bucket name has been taken by any user, you can't create
         * another bucket with that same name.
         *
         * You can optionally specify a location for your bucket if you want to
         * keep your data closer to your applications or users.
         */
        System.out.println("Creating bucket " + bucketName + "\n");
        if (!s3.doesBucketExist(bucketName)) {
            s3.createBucket(bucketName);
        }

        /*
         * List the buckets in your account
         */
        System.out.println("Listing buckets");
        for (Bucket bucket : s3.listBuckets()) {
            System.out.println(" - " + bucket.getName());
        }
        System.out.println();

        /*
         * Upload an object to your bucket - You can easily upload a file to
         * S3, or upload directly an InputStream if you know the length of
         * the data in the stream. You can also specify your own metadata
         * when uploading to S3, which allows you set a variety of options
         * like content-type and content-encoding, plus additional metadata
         * specific to your applications.
         */
        System.out.println("Uploading a new object to S3 from a file\n");
        String pathname = "D:\\Program Files\\apache-tomcat-7.0.42\\webapps\\WorkerForP365\\src\\AAA_1465.jpg";
        File file = new File(pathname);
        s3.putObject(
                new PutObjectRequest(bucketName, key, file).withCannedAcl(CannedAccessControlList.PublicRead));

        /*
         * Download an object - When you download an object, you get all of
         * the object's metadata and a stream from which to read the contents.
         * It's important to read the contents of the stream as quickly as
         * possibly since the data is streamed directly from Amazon S3 and your
         * network connection will remain open until you read all the data or
         * close the input stream.
         *
         * GetObjectRequest also supports several other options, including
         * conditional downloading of objects based on modification times,
         * ETags, and selectively downloading a range of an object.
         */
        System.out.println("Downloading an object");
        S3Object object = s3.getObject(new GetObjectRequest(bucketName, key));
        System.out.println("Content-Type: " + object.getObjectMetadata().getContentType());
        displayTextInputStream(object.getObjectContent());

        /*
         * List objects in your bucket by prefix - There are many options for
         * listing the objects in your bucket.  Keep in mind that buckets with
         * many objects might truncate their results when listing their objects,
         * so be sure to check if the returned object listing is truncated, and
         * use the AmazonS3.listNextBatchOfObjects(...) operation to retrieve
         * additional results.
         */
        System.out.println("Listing objects");
        ObjectListing objectListing = s3
                .listObjects(new ListObjectsRequest().withBucketName(bucketName).withPrefix("My"));
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            System.out.println(
                    " - " + objectSummary.getKey() + "  " + "(size = " + objectSummary.getSize() + ")");
        }
        System.out.println();

        /*
         * Delete an object - Unless versioning has been turned on for your bucket,
         * there is no way to undelete an object, so use caution when deleting objects.
         */
        //System.out.println("Deleting an object\n");
        //s3.deleteObject(bucketName, key);

        /*
         * Delete a bucket - A bucket must be completely empty before it can be
         * deleted, so remember to delete any objects from your buckets before
         * you try to delete them.
         */
        //System.out.println("Deleting bucket " + bucketName + "\n");
        //s3.deleteBucket(bucketName);
    } catch (AmazonServiceException ase) {
        System.out.println("Caught an AmazonServiceException, which means your request made it "
                + "to Amazon S3, but was rejected with an error response for some reason.");
        System.out.println("Error Message:    " + ase.getMessage());
        System.out.println("HTTP Status Code: " + ase.getStatusCode());
        System.out.println("AWS Error Code:   " + ase.getErrorCode());
        System.out.println("Error Type:       " + ase.getErrorType());
        System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
        System.out.println("Caught an AmazonClientException, which means the client encountered "
                + "a serious internal problem while trying to communicate with S3, "
                + "such as not being able to access the network.");
        System.out.println("Error Message: " + ace.getMessage());
    }
}

From source file:org.pentaho.amazon.client.impl.S3ClientImpl.java

License:Apache License

private String readLogFromS3(String stagingBucketName, String key) {

    Scanner logScanner = null;/*from   w w  w .java2s .co m*/
    S3ObjectInputStream s3ObjectInputStream = null;
    GZIPInputStream gzipInputStream = null;
    String lineSeparator = System.getProperty("line.separator");
    StringBuilder logContents = new StringBuilder();
    S3Object outObject;

    try {
        if (s3Client.doesObjectExist(stagingBucketName, key)) {

            outObject = s3Client.getObject(stagingBucketName, key);
            s3ObjectInputStream = outObject.getObjectContent();
            gzipInputStream = new GZIPInputStream(s3ObjectInputStream);

            logScanner = new Scanner(gzipInputStream);
            while (logScanner.hasNextLine()) {
                logContents.append(logScanner.nextLine() + lineSeparator);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (logScanner != null) {
                logScanner.close();
            }
            if (s3ObjectInputStream != null) {
                s3ObjectInputStream.close();
            }
            if (gzipInputStream != null) {
                gzipInputStream.close();
            }
        } catch (IOException e) {
            //do nothing
        }
    }
    return logContents.toString();
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    Log4jFileAppender appender = null;/*from  w  w  w. j  a  va 2 s  .c om*/
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // create/connect aws service
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);

        // pull down jar from vfs
        FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl));
        File tmpFile = File.createTempFile("customEMR", "jar");
        tmpFile.deleteOnExit();
        FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
        IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut);
        URL localJarUrl = tmpFile.toURI().toURL();

        // find main class in jar
        String mainClass = getMainClass(localJarUrl);

        // create staging bucket
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        FileSystemOptions opts = new FileSystemOptions();
        DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator(
                null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey()));
        FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts);

        String stagingBucketName = stagingDirFileObject.getName().getBaseName();
        if (!s3Client.doesBucketExist(stagingBucketName)) {
            s3Client.createBucket(stagingBucketName);
        }

        // delete old jar if needed
        try {
            s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName());
        } catch (Exception ex) {
            logError(Const.getStackTracker(ex));
        }

        // put jar in s3 staging bucket
        s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile));
        // create non-vfs s3 url to jar
        String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName();
        String stagingS3BucketUrl = "s3://" + stagingBucketName;

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create EMR job flow
            runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass);
            // start EMR job
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
        } else {
            List<String> jarStepArgs = new ArrayList<String>();
            if (!StringUtil.isEmpty(cmdLineArgs)) {
                StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken();
                    logBasic("adding args: " + token);
                    jarStepArgs.add(token);
                }
            }

            HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
            hadoopJarStep.setJar(stagingS3JarUrl);
            hadoopJarStep.setMainClass(mainClass);
            hadoopJarStep.setArgs(jarStepArgs);

            StepConfig stepConfig = new StepConfig();
            stepConfig.setName("custom jar: " + jarUrl);
            stepConfig.setHadoopJarStep(hadoopJarStep);

            List<StepConfig> steps = new ArrayList<StepConfig>();
            steps.add(stepConfig);

            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);

            emrClient.addJobFlowSteps(addJobFlowStepsRequest);
        }

        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 60;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60");
        }

        // monitor it / blocking / logging if desired
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING";

                    List<String> jobFlowIds = new ArrayList<String>();
                    String id = hadoopJobFlowId;
                    if (StringUtil.isEmpty(hadoopJobFlowId)) {
                        id = runJobFlowResult.getJobFlowId();
                        jobFlowIds.add(id);
                    }

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(id)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent,
                        // mapPercent, reducePercent));
                        logBasic(hadoopJobName + " execution status: " + executionState);
                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            // Ignore
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) {
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout");
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr");
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Executes a Hive job into the AWS Elastic MapReduce service.
 */// w w  w. j  a  v  a2s  . c om
public Result execute(Result result, int arg1) throws KettleException {

    // Setup a log file.
    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // Create and connect an AWS service.
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        // Get bucket name and S3 URL.
        String stagingBucketName = GetBucketName(stagingDir);
        String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$

        // Prepare staging S3 URL for Hive script file.
        String stagingS3qUrl = "";
        if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$

            // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path}
            if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$
                stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$
            } else {
                stagingS3qUrl = qUrl;
            }

        } else {
            // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory.
            // First, check for the correct protocol.
            if (!qUrl.startsWith("file:")) { //$NON-NLS-1$
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG,
                            "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$
                }
            }
            // pull down .q file from VSF
            FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl));
            File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$
            tmpFile.deleteOnExit();
            FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
            IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut);
            // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/
            String key = GetKeyFromS3Url(stagingDir);
            if (key == null) {
                key = qFile.getName().getBaseName();
            } else {
                key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$
            }

            // delete the previous .q file in S3
            try {
                s3Client.deleteObject(stagingBucketName, key);
            } catch (Exception ex) {
                logError(Const.getStackTracker(ex));
            }

            // Put .q file in S3 Log Directory.
            s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile));
            stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$
        }

        // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR
        // job.
        jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create an EMR job flow, start a step to setup Hive and get the job flow ID.
            runJobFlowRequest = createJobFlow();
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
            hadoopJobFlowId = runJobFlowResult.getJobFlowId();
        }

        // Now EMR job flow is ready to accept a Run Hive Script step.
        // First, prepare a Job Flow ID list.
        List<String> jobFlowIds = new ArrayList<String>();
        jobFlowIds.add(hadoopJobFlowId);

        // Configure a HadoopJarStep.
        String args = "s3://elasticmapreduce/libs/hive/hive-script "
                + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f "
                + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$
        List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args);

        // Add a Run Hive Script step to the existing job flow.
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
        addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
        addJobFlowStepsRequest.setSteps(steps);
        emrClient.addJobFlowSteps(addJobFlowStepsRequest);

        // Set a logging interval.
        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 10;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$
                    loggingIntervalS));
        }

        // monitor and log if intended.
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING"; //$NON-NLS-1$

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$
                                "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId)
                                + executionState);

                        if (parentJob.isStopped()) {
                            if (!alive) {
                                TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
                                terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
                                emrClient.terminateJobFlows(terminateJobFlowsRequest);
                            }
                            break;
                        }

                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            logError(Const.getStackTracker(ie));
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pieShare.pieDrive.adapter.s3.S3Adapter.java

@Override
public void download(PieDriveFile file, OutputStream stream) throws AdaptorException {
    byte[] buf = new byte[1024];
    int count = 0;

    S3Object object = s3Auth.getClient().getObject(new GetObjectRequest(bucketName, file.getUuid()));
    InputStream objectData = object.getObjectContent();

    try {/*from   ww  w  . j av a2  s  .c om*/
        while ((count = objectData.read(buf)) != -1) {
            if (Thread.interrupted()) {
                throw new AdaptorException("Download interrupted.");
            }

            stream.write(buf, 0, count);
        }

        stream.close();
        objectData.close();
        PieLogger.trace(S3Adapter.class, "{} downloaded", file.getUuid());

    } catch (IOException e) {
        throw new AdaptorException(e);
    } catch (AmazonServiceException ase) {
        throw new AdaptorException(ase);
    } catch (AmazonClientException ace) {
        throw new AdaptorException(ace);
    }
}