Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import cascading.util.S3Util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import org.apache.log4j.Logger; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; /** * Class S3HttpFileSystem provides a basic {@link FileSystem} for reading and writing remote S3 data. * <p/> * To use this FileSystem, reference your S3 resources with the following URI pattern:<br/> * s3tp://AWS_ACCESS_KEY_ID:AWS_SECRET_ACCESS_KEY@bucketname/key * <p/> * Optionally these configuration/system properties can be set, instead of stuffing values into the URL authority: * "fs.s3tp.awsAccessKeyId" and "fs.s3tp.awsSecretAccessKey". * * @deprecated */ @Deprecated public class S3HttpFileSystem extends StreamedFileSystem { /** Field LOG */ private static final Logger LOG = Logger.getLogger(S3HttpFileSystem.class); public static final String S3TP_SCHEME = "s3tp"; private URI uri; private RestS3Service s3Service; private S3Bucket s3Bucket; @Override public void initialize(URI uri, Configuration conf) throws IOException { LOG.warn( "the S3HttpFileSystem (s3tp://) is deprecated, please use the Hadoop NativeS3NativeFileSystem (s3n://)"); setConf(conf); String key = conf.get("fs.s3tp.awsAccessKeyId", System.getProperty("fs.s3tp.awsAccessKeyId")); String secret = conf.get("fs.s3tp.awsSecretAccessKey", System.getProperty("fs.s3tp.awsSecretAccessKey")); this.s3Service = S3Util.getS3Service(uri, key, secret); this.s3Bucket = S3Util.getS3Bucket(uri); this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); } @Override public URI getUri() { return uri; } @Override public FSDataOutputStream create(final Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (!overwrite && exists(path)) throw new IOException("file already exists: " + path); if (LOG.isDebugEnabled()) LOG.debug("creating file: " + path); final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final DigestOutputStream digestStream = new DigestOutputStream(stream, getMD5Digest()); return new FSDataOutputStream(digestStream, null) { @Override public void close() throws IOException { super.close(); S3Object object = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.CREATE_OBJECT); object.setContentType("text/plain"); // todo use 'binary/octet-stream' object.setMd5Hash(digestStream.getMessageDigest().digest()); // todo buffer to disk instead byte[] bytes = stream.toByteArray(); object.setDataInputStream(new ByteArrayInputStream(bytes)); object.setContentLength(bytes.length); if (LOG.isDebugEnabled()) LOG.debug("putting file: " + path); S3Util.putObject(s3Service, s3Bucket, object); } }; } @Override public FSDataInputStream open(Path path, int i) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("opening file: " + path); S3Object object = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.OBJECT); FSDigestInputStream inputStream = new FSDigestInputStream(S3Util.getObjectInputStream(object), getMD5SumFor(getConf(), path)); // ctor requires Seekable or PositionedReadable stream return new FSDataInputStream(inputStream); } @Override public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("making dirs for: " + path); S3Object directory = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.DETAILS); if (directory != null && S3Util.isDirectory(directory)) return true; directory = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.CREATE_DIR); S3Util.putObject(s3Service, s3Bucket, directory); return true; } @Deprecated @Override public boolean delete(Path path) throws IOException { return delete(path, true); } @Override public boolean delete(Path path, boolean recursive) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("deleting file: " + path); return S3Util.deleteObject(s3Service, s3Bucket, path); } @Override public boolean exists(Path path) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("testing file: " + path); return S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.DETAILS) != null; } @Override public FileStatus[] listStatus(Path path) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("listing path: " + path); // todo: content-type not returned on list S3Object[] objects = S3Util.listObjects(s3Service, s3Bucket, path); // if an object is an exact match, and is a file, just return the file status String key = S3Util.getKeyFrom(path); for (S3Object object : objects) { if (object.getKey().equals(key) && !S3Util.isDirectory(object)) return new FileStatus[] { makeStatus(object) }; } FileStatus[] status = new FileStatus[objects.length]; for (int i = 0; i < objects.length; i++) status[i] = makeStatus(objects[i]); return status; } @Override public FileStatus getFileStatus(Path path) throws IOException { S3Object object = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.DETAILS); if (LOG.isDebugEnabled()) LOG.debug("returning status for: " + path); if (object == null) throw new FileNotFoundException("file does not exist: " + path); return makeStatus(object); } private StreamedFileStatus makeStatus(S3Object object) { return new StreamedFileStatus(object.getContentLength(), S3Util.isDirectory(object), 1, getDefaultBlockSize(), object.getLastModifiedDate().getTime(), new Path(uri.toString() + "/", object.getKey()), object.getMd5HashAsHex()); } private MessageDigest getMD5Digest() throws IOException { try { return MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException exception) { throw new IOException("digest not found: " + exception.getMessage()); } } }