Java tutorial
package com.splout.db.dnode; /* * #%L * Splout SQL Server * %% * Copyright (C) 2012 Datasalt Systems S.L. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * #L% */ import com.splout.db.common.SploutConfiguration; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; import org.jets3t.service.security.AWSCredentials; import java.io.*; import java.net.URI; import java.net.URISyntaxException; import java.nio.channels.ClosedByInterruptException; import java.nio.channels.FileChannel; import java.util.UUID; /** * This Fetcher is used by {@link DNodeHandler} to fetch data to deploy. It handles: file, HDFS and S3 URIs. For the S3 * service it uses the JETS3T library. * <p/> * The fetcher has to return a local File object which is a folder that can be used to perform an atomic "mv" operation. * The folder has to contain the DB file. */ public class Fetcher { private final static Log log = LogFactory.getLog(Fetcher.class); private File tempDir; private S3Service s3Service; private String accessKey; private String secretKey; private int downloadBufferSize; private int bytesPerSecThrottle; private long bytesToReportProgress; private Configuration hadoopConf; public final static int SIZE_UNKNOWN = -1; public Fetcher(SploutConfiguration config) { tempDir = new File(config.getString(FetcherProperties.TEMP_DIR)); accessKey = config.getString(FetcherProperties.S3_ACCESS_KEY, null); secretKey = config.getString(FetcherProperties.S3_SECRET_KEY, null); downloadBufferSize = config.getInt(FetcherProperties.DOWNLOAD_BUFFER); bytesPerSecThrottle = config.getInt(FetcherProperties.BYTES_PER_SEC_THROTTLE); bytesToReportProgress = config.getLong(FetcherProperties.BYTES_TO_REPORT_PROGRESS); String fsName = config.getString(FetcherProperties.HADOOP_FS_NAME); hadoopConf = new Configuration(); if (fsName != null) { hadoopConf.set("fs.default.name", fsName); } log.info("Created " + Fetcher.class + " with tempDir = " + tempDir); if (bytesPerSecThrottle > 0) { log.info("Throttling at: " + bytesPerSecThrottle + " bytes per sec."); } else { log.warn( "No throttling. Fetched data will be transferred at full speed. This may affect query servicing."); } } private AWSCredentials getCredentials() { AWSCredentials credentials = new AWSCredentials(accessKey, secretKey); return credentials; } /* * Fetch a file that is in a Hadoop file system. Return a local File. * Interruptible. */ private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } } private void cleanDirNoExceptions(File toDir) { try { FileUtils.deleteDirectory(toDir); } catch (IOException ee) { log.warn("Impossible to clean up folder " + toDir + "when cancelling a download", ee); } } /** * An interface that can be implemented to receive progress about fetching. The Fetcher will use it when provided. */ public static interface Reporter { /** * This method is called periodically to report progress made. The reported consumed bytes are an incremental * measure, not a total measure. In other words, "consumed" is not the total consumed bytes since the beginning of * the fetching, but the total "consumed" bytes since the last call to progress. */ public void progress(long consumed); } /** * Implements basic throttling capabilities. */ public static class Throttler { double bytesPerSec; long lastTime = System.currentTimeMillis(); public Throttler(double bytesPerSec) { this.bytesPerSec = bytesPerSec; } public void incrementAndThrottle(int bytes) { if (bytesPerSec < 1) { // no throttle at all return; } long currentTime = System.currentTimeMillis(); long timeDiff = currentTime - lastTime; if (timeDiff == 0) { timeDiff = 1; } double bytesPerSec = (bytes / (double) timeDiff) * 1000; if (bytesPerSec > this.bytesPerSec) { // Throttle double exceededByFactorOf = bytesPerSec / this.bytesPerSec; try { long mustSleep = (long) ((exceededByFactorOf - 1) * timeDiff); Thread.sleep(mustSleep); } catch (InterruptedException e) { e.printStackTrace(); } } lastTime = System.currentTimeMillis(); } } /* * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes. * Interruptible. */ private File s3Fetch(URI uri, Reporter reporter) throws IOException, InterruptedException { String bucketName = uri.getHost(); String path = uri.getPath(); UUID uniqueId = UUID.randomUUID(); File destFolder = new File(tempDir, uniqueId.toString() + "/" + path); if (destFolder.exists()) { FileUtils.deleteDirectory(destFolder); } destFolder.mkdirs(); Throttler throttler = new Throttler((double) bytesPerSecThrottle); boolean done = false; try { s3Service = new RestS3Service(getCredentials()); if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) { throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName); } if (path.startsWith("/")) { path = path.substring(1, path.length()); } for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) { long bytesSoFar = 0; String fileName = path; if (path.contains("/")) { fileName = path.substring(path.lastIndexOf("/") + 1, path.length()); } File fileDest = new File(destFolder, fileName); log.info("Downloading " + object.getKey() + " to " + fileDest + " ..."); if (fileDest.exists()) { fileDest.delete(); } object = s3Service.getObject(new S3Bucket(bucketName), object.getKey()); InputStream iS = object.getDataInputStream(); FileOutputStream writer = new FileOutputStream(fileDest); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); writer.close(); cleanDirNoExceptions(destFolder); throw new InterruptedException(); } bytesSoFar += nRead; writer.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } writer.close(); iS.close(); done = true; } if (!done) { throw new IOException("Bucket is empty! " + bucketName + " path: " + path); } } catch (S3ServiceException e) { throw new IOException(e); } return destFolder; } /* * Fetch a file that is in a local file system. Return a local File. */ private File fileFetch(File file, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toDir = new File(tempDir, uniqueId.toString() + "/" + file.getName()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); } toDir.mkdirs(); log.info("Copying " + file + " to " + toDir); try { copyFile(file, new File(toDir, file.getName()), reporter); } catch (InterruptedException e) { cleanDirNoExceptions(toDir); throw e; } return toDir; } /** * In case of interrupted, written file is not deleted. */ private void copyFile(File sourceFile, File destFile, Reporter reporter) throws IOException, InterruptedException { if (!destFile.exists()) { destFile.createNewFile(); } FileChannel source = null; FileChannel destination = null; Throttler throttler = new Throttler((double) bytesPerSecThrottle); FileInputStream iS = null; FileOutputStream oS = null; try { iS = new FileInputStream(sourceFile); oS = new FileOutputStream(destFile); source = iS.getChannel(); destination = oS.getChannel(); long bytesSoFar = 0; long reportingBytesSoFar = 0; long size = source.size(); int transferred = 0; while (bytesSoFar < size) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { throw new InterruptedException(); } // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes. // This is done on purpose for being able to implement Throttling. transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize); bytesSoFar += transferred; reportingBytesSoFar += transferred; throttler.incrementAndThrottle(transferred); if (reportingBytesSoFar >= bytesToReportProgress) { reporter.progress(reportingBytesSoFar); reportingBytesSoFar = 0l; } } if (reporter != null) { reporter.progress(reportingBytesSoFar); } } catch (InterruptedException e) { e.printStackTrace(); } finally { if (iS != null) { iS.close(); } if (oS != null) { oS.close(); } if (source != null) { source.close(); } if (destination != null) { destination.close(); } } } /** * Use this method to know the total size of a deployment URI. */ public long sizeOf(String uriStr) throws IOException, URISyntaxException { URI uri = new URI(uriStr); if (uriStr.startsWith("file:")) { File f = new File(uri); return f.isDirectory() ? FileUtils.sizeOfDirectory(f) : f.length(); } else if (uriStr.startsWith("s3")) { return -1; // NotYetImplemented // Be flexible as to what we can expect here (e.g. maprfs, etc) } else { // } else if(uriStr.startsWith("hdfs")) { return FileSystem.get(hadoopConf).getContentSummary(new Path(uriStr)).getLength(); // } else { // throw new IllegalArgumentException("Scheme not recognized or non-absolute URI provided: " + uri); } } /** * This is the main method that accepts a URI string and delegates the fetching to the appropriate private method. */ public File fetch(String uriStr) throws IOException, URISyntaxException, InterruptedException { return fetch(uriStr, null); } /** * This is the main method that accepts a URI string and delegates the fetching to the appropriate private method. * Interruptible */ public File fetch(String uriStr, Reporter reporter) throws IOException, URISyntaxException, InterruptedException { URI uri = new URI(uriStr); if (uriStr.startsWith("file:")) { return fileFetch(new File(uri), reporter); } else if (uriStr.startsWith("s3")) { return s3Fetch(uri, reporter); } else { // Be flexible as to what we can expect here (e.g. maprfs, etc) // } else if(uriStr.startsWith("hdfs")) { return hdfsFetch(new Path(uriStr), reporter); // } else { // throw new IllegalArgumentException("Scheme not recognized or non-absolute URI provided: " + uri); } } }