voldemort.server.protocol.hadoop.RestHadoopFetcher.java Source code

Java tutorial

Introduction

Here is the source code for voldemort.server.protocol.hadoop.RestHadoopFetcher.java

Source

/*
 * Copyright 2008-2013 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package voldemort.server.protocol.hadoop;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.concurrent.atomic.AtomicInteger;

import javax.management.ObjectName;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

import voldemort.VoldemortException;
import voldemort.annotations.jmx.JmxGetter;
import voldemort.server.VoldemortConfig;
import voldemort.server.protocol.admin.AsyncOperationStatus;
import voldemort.store.readonly.FileFetcher;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.checksum.CheckSum;
import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
import voldemort.utils.ByteUtils;
import voldemort.utils.DynamicEventThrottler;
import voldemort.utils.DynamicThrottleLimit;
import voldemort.utils.EventThrottler;
import voldemort.utils.JmxUtils;
import voldemort.utils.Time;
import voldemort.utils.Utils;

import com.linkedin.tusk.RestFSException;
import com.linkedin.tusk.RestFileStatus;
import com.linkedin.tusk.RestFileSystem;

public class RestHadoopFetcher implements FileFetcher {

    private static final Logger logger = Logger.getLogger(RestHadoopFetcher.class);
    private static final AtomicInteger copyCount = new AtomicInteger(0);

    private final Long maxBytesPerSecond, reportingIntervalBytes;
    private EventThrottler throttler = null;
    private DynamicThrottleLimit globalThrottleLimit = null;
    private final int bufferSize;
    private AsyncOperationStatus status;
    private long minBytesPerSecond = 0;
    private long retryDelayMs = 0;
    private int maxAttempts = 0;

    public RestHadoopFetcher(VoldemortConfig config) {
        this(null, null, config.getReadOnlyFetcherReportingIntervalBytes(), config.getFetcherBufferSize(),
                config.getReadOnlyFetcherMinBytesPerSecond(), config.getReadOnlyFetchRetryCount(),
                config.getReadOnlyFetchRetryDelayMs());

        logger.info("Created Rest-based hdfs fetcher with no dynamic throttler, buffer size " + bufferSize
                + ", reporting interval bytes " + reportingIntervalBytes);
    }

    public RestHadoopFetcher(VoldemortConfig config, DynamicThrottleLimit dynThrottleLimit) {
        this(dynThrottleLimit, null, config.getReadOnlyFetcherReportingIntervalBytes(),
                config.getFetcherBufferSize(), config.getReadOnlyFetcherMinBytesPerSecond(),
                config.getReadOnlyFetchRetryCount(), config.getReadOnlyFetchRetryDelayMs());

        logger.info("Created Rest-based hdfs fetcher with throttle rate " + dynThrottleLimit.getRate()
                + ", buffer size " + bufferSize + ", reporting interval bytes " + reportingIntervalBytes);
    }

    // Test-only constructor
    public RestHadoopFetcher() {
        this((Long) null, VoldemortConfig.REPORTING_INTERVAL_BYTES, VoldemortConfig.DEFAULT_BUFFER_SIZE);
    }

    // Test-only constructor
    public RestHadoopFetcher(Long maxBytesPerSecond, Long reportingIntervalBytes, int bufferSize) {
        this(null, maxBytesPerSecond, reportingIntervalBytes, bufferSize, 0, 3, 1000);
    }

    public RestHadoopFetcher(DynamicThrottleLimit dynThrottleLimit, Long maxBytesPerSecond,
            Long reportingIntervalBytes, int bufferSize, long minBytesPerSecond, int retryCount,
            long retryDelayMs) {
        if (maxBytesPerSecond != null) {
            this.maxBytesPerSecond = maxBytesPerSecond;
            this.throttler = new EventThrottler(this.maxBytesPerSecond);
        } else if (dynThrottleLimit != null && dynThrottleLimit.getRate() != 0) {
            this.maxBytesPerSecond = dynThrottleLimit.getRate();
            this.throttler = new DynamicEventThrottler(dynThrottleLimit);
            this.globalThrottleLimit = dynThrottleLimit;
            logger.info(
                    "Initializing Dynamic Event throttler with rate : " + this.maxBytesPerSecond + " bytes / sec");
        } else
            this.maxBytesPerSecond = null;
        this.reportingIntervalBytes = Utils.notNull(reportingIntervalBytes);
        this.bufferSize = bufferSize;
        this.status = null;
        this.minBytesPerSecond = minBytesPerSecond;
        this.maxAttempts = retryCount + 1;
        this.retryDelayMs = retryDelayMs;
    }

    @Override
    public File fetch(String sourceFileUrl, String destinationFile) throws IOException {
        // add job to the throttler
        addThrottledJob();
        ObjectName jmxName = null;
        try {
            // authentication
            RestHadoopAuth.loginSecuredHdfs();

            // instantiate RestFS
            // It is a hack to replace webhdfs w/ http because the BnP job will
            // construct URL w/ webhdfs. We have to release this code to all
            // servers before modifying BnP. Otherwise, servers will immediately
            // using http w/o going through RESTHdfsClient and the operations
            // will fail.
            sourceFileUrl = sourceFileUrl.replace("webhdfs", "http");
            URL sourceUrl = new URL(sourceFileUrl);
            RestFileSystem rfs = new RestFileSystem(
                    sourceUrl.getProtocol() + "://" + sourceUrl.getHost() + ":" + sourceUrl.getPort());
            String fullyQualifiedFileName = sourceUrl.getFile();
            CopyStats stats = new CopyStats(fullyQualifiedFileName, sizeOfPath(rfs, fullyQualifiedFileName));
            jmxName = JmxUtils.registerMbean("hdfs-copy-" + copyCount.getAndIncrement(), stats);
            File destination = new File(destinationFile);

            if (destination.exists()) {
                throw new VoldemortException(
                        "Version directory " + destination.getAbsolutePath() + " already exists");
            }

            logger.info("Starting fetch for : " + sourceFileUrl);
            boolean result = fetch(rfs, fullyQualifiedFileName, destination, stats);
            logger.info("Completed fetch : " + sourceFileUrl);

            if (result) {
                return destination;
            } else {
                return null;
            }
        } catch (RestFSException rfse) {
            rfse.printStackTrace();
            logger.error("Encountered exception while accessing hadoop via RestHdfsClient : " + rfse);
            throw new VoldemortException("Error while accessing hadoop via RestHdfsClient : " + rfse);
        } catch (Throwable te) {
            te.printStackTrace();
            logger.error("Error thrown while trying to get Hadoop filesystem");
            throw new VoldemortException("Error thrown while trying to get Hadoop filesystem : " + te);
        } finally {
            removeThrottledJob();
            if (jmxName != null)
                JmxUtils.unregisterMbean(jmxName);
        }
    }

    @Override
    public void setAsyncOperationStatus(AsyncOperationStatus status) {
    }

    private void addThrottledJob() throws VoldemortException {
        if (this.globalThrottleLimit != null) {
            if (this.globalThrottleLimit.getSpeculativeRate() < this.minBytesPerSecond)
                throw new VoldemortException("Too many push jobs.");
            this.globalThrottleLimit.incrementNumJobs();
        }
    }

    private void removeThrottledJob() {
        if (this.globalThrottleLimit != null) {
            this.globalThrottleLimit.decrementNumJobs();
        }
    }

    private long sizeOfPath(RestFileSystem rfs, String path) throws IOException, RestFSException {
        long size = 0;
        RestFileStatus[] statuses = rfs.listStatus(path).toArray();
        if (statuses != null) {
            for (RestFileStatus status : statuses) {
                if (status.isDir())
                    size += sizeOfPath(rfs, path + File.separator + status.getPathSuffix());
                else
                    size += status.getLength();
            }
        }
        return size;
    }

    private void sleepForRetryDelayMs() {
        if (retryDelayMs > 0) {
            try {
                Thread.sleep(retryDelayMs);
            } catch (InterruptedException ie) {
                logger.error("Fetcher interrupted while waiting to retry", ie);
            }
        }
    }

    /**
     * Function to copy a file from the given filesystem with a checksum of type
     * 'checkSumType' computed and returned. In case an error occurs during such
     * a copy, we do a retry for a maximum of NUM_RETRIES
     * 
     * @param rfs RestFilesystem used to copy the file
     * @param source Source path of the file to copy
     * @param dest Destination path of the file on the local machine
     * @param stats Stats for measuring the transfer progress
     * @param checkSumType Type of the Checksum to be computed for this file
     * @return A Checksum (generator) of type checkSumType which contains the
     *         computed checksum of the copied file
     * @throws IOException
     */
    private CheckSum copyFileWithCheckSum(RestFileSystem rfs, String source, File dest, CopyStats stats,
            CheckSumType checkSumType) throws Throwable {
        CheckSum fileCheckSumGenerator = null;
        logger.info("Starting copy of " + source + " to " + dest);
        BufferedInputStream input = null;
        OutputStream output = null;

        for (int attempt = 0; attempt < maxAttempts; attempt++) {
            boolean success = true;
            long totalBytesRead = 0;
            boolean fsOpened = false;

            try {
                // Create a per file checksum generator
                if (checkSumType != null) {
                    fileCheckSumGenerator = CheckSum.getInstance(checkSumType);
                }

                logger.info("Attempt " + attempt + " at copy of " + source + " to " + dest);

                input = new BufferedInputStream(rfs.openFile(source).getInputStream());
                fsOpened = true;

                output = new BufferedOutputStream(new FileOutputStream(dest));
                byte[] buffer = new byte[bufferSize];
                while (true) {
                    int read = input.read(buffer);
                    if (read < 0) {
                        break;
                    } else {
                        output.write(buffer, 0, read);
                    }

                    // Update the per file checksum
                    if (fileCheckSumGenerator != null) {
                        fileCheckSumGenerator.update(buffer, 0, read);
                    }

                    // Check if we need to throttle the fetch
                    if (throttler != null) {
                        throttler.maybeThrottle(read);
                    }

                    stats.recordBytes(read);
                    if (stats.getBytesSinceLastReport() > reportingIntervalBytes) {
                        NumberFormat format = NumberFormat.getNumberInstance();
                        format.setMaximumFractionDigits(2);
                        logger.info(stats.getTotalBytesCopied() / (1024 * 1024) + " MB copied at "
                                + format.format(stats.getBytesPerSecond() / (1024 * 1024)) + " MB/sec - "
                                + format.format(stats.getPercentCopied()) + " % complete, destination:" + dest);
                        if (this.status != null) {
                            this.status.setStatus(stats.getTotalBytesCopied() / (1024 * 1024) + " MB copied at "
                                    + format.format(stats.getBytesPerSecond() / (1024 * 1024)) + " MB/sec - "
                                    + format.format(stats.getPercentCopied()) + " % complete, destination:" + dest);
                        }
                        stats.reset();
                    }
                }
                // at this point, we are done!
                logger.info("Completed copy of " + source + " to " + dest);
            } catch (Throwable te) {
                success = false;
                if (!fsOpened) {
                    logger.error("Error while opening the file stream to " + source, te);
                } else {
                    logger.error("Error while copying file " + source + " after " + totalBytesRead + " bytes.", te);
                }
                if (te.getCause() != null) {
                    logger.error("Cause of error ", te.getCause());
                }
                te.printStackTrace();

                if (attempt < maxAttempts - 1) {
                    logger.info("Will retry copying after " + retryDelayMs + " ms");
                    sleepForRetryDelayMs();
                } else {
                    logger.info("Fetcher giving up copy after " + maxAttempts + " attempts");
                    throw te;
                }
            } finally {
                IOUtils.closeQuietly(output);
                IOUtils.closeQuietly(input);
                if (success) {
                    break;
                }
            }
            logger.info("Completed copy of " + source + " to " + dest);
        }
        return fileCheckSumGenerator;
    }

    private boolean fetch(RestFileSystem rfs, String source, File dest, CopyStats stats)
            throws Throwable, RestFSException {
        boolean fetchSucceed = false;

        if (rfs.fileStatus(source).isDir()) {
            Utils.mkdirs(dest);
            RestFileStatus statuses[] = rfs.listStatus(source).toArray();
            if (statuses != null && statuses.length > 0) {
                // sort the files so that index files come last. Maybe
                // this will help keep them cached until the swap
                Arrays.sort(statuses, new IndexFileLastComparator());
                byte[] origCheckSum = null;
                CheckSumType checkSumType = CheckSumType.NONE;

                // Do a checksum of checksum - Similar to HDFS
                CheckSum checkSumGenerator = null;
                CheckSum fileCheckSumGenerator = null;

                for (RestFileStatus status : statuses) {
                    String fileNameWithAbsolutePath = status.getAbsolutePath();
                    String shortFileName = status.getPathSuffix();
                    logger.info("fetching file: " + fileNameWithAbsolutePath);
                    // Kept for backwards compatibility
                    if (shortFileName.contains("checkSum.txt")) {

                        logger.warn("Found checksum file in old format: " + shortFileName);

                    } else if (shortFileName.contains(".metadata")) {

                        logger.debug("Reading .metadata");
                        // Read metadata into local file
                        File copyLocation = new File(dest, shortFileName);
                        copyFileWithCheckSum(rfs, fileNameWithAbsolutePath, copyLocation, stats, null);

                        // Open the local file to initialize checksum
                        ReadOnlyStorageMetadata metadata;
                        try {
                            metadata = new ReadOnlyStorageMetadata(copyLocation);
                        } catch (IOException e) {
                            logger.error("Error reading metadata file ", e);
                            throw new VoldemortException(e);
                        }

                        // Read checksum
                        String checkSumTypeString = (String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE);
                        String checkSumString = (String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM);

                        if (checkSumTypeString != null && checkSumString != null) {

                            try {
                                origCheckSum = Hex.decodeHex(checkSumString.toCharArray());
                            } catch (DecoderException e) {
                                logger.error("Exception reading checksum file. Ignoring checksum ", e);
                                continue;
                            }

                            logger.debug("Checksum from .metadata " + new String(Hex.encodeHex(origCheckSum)));

                            // Define the Global checksum generator
                            checkSumType = CheckSum.fromString(checkSumTypeString);
                            checkSumGenerator = CheckSum.getInstance(checkSumType);
                        }

                    } else if (!shortFileName.startsWith(".")) {

                        // Read other (.data , .index files)
                        File copyLocation = new File(dest, shortFileName);
                        fileCheckSumGenerator = copyFileWithCheckSum(rfs, fileNameWithAbsolutePath, copyLocation,
                                stats, checkSumType);

                        if (fileCheckSumGenerator != null && checkSumGenerator != null) {
                            byte[] checkSum = fileCheckSumGenerator.getCheckSum();
                            if (logger.isDebugEnabled()) {
                                logger.debug("Checksum for " + shortFileName + " - "
                                        + new String(Hex.encodeHex(checkSum)));
                            }
                            checkSumGenerator.update(checkSum);
                        }
                    }

                }

                logger.info(
                        "Completed reading all files from " + source.toString() + " to " + dest.getAbsolutePath());
                // Check checksum
                if (checkSumType != CheckSumType.NONE) {
                    byte[] newCheckSum = checkSumGenerator.getCheckSum();
                    boolean checkSumComparison = (ByteUtils.compare(newCheckSum, origCheckSum) == 0);

                    logger.info("Checksum generated from streaming - " + new String(Hex.encodeHex(newCheckSum)));
                    logger.info("Checksum on file - " + new String(Hex.encodeHex(origCheckSum)));
                    logger.info("Check-sum verification - " + checkSumComparison);

                    fetchSucceed = checkSumComparison;
                } else {
                    logger.info("No check-sum verification required");
                    fetchSucceed = true;
                }
            } else {
                logger.error("No files found under the source location: " + source);
            }
        } else {
            logger.error("Source " + source + " should be a directory");
        }
        return fetchSucceed;
    }

    public static class CopyStats {

        private final String fileName;
        private volatile long bytesSinceLastReport;
        private volatile long totalBytesCopied;
        private volatile long lastReportNs;
        private volatile long totalBytes;

        public CopyStats(String fileName, long totalBytes) {
            this.fileName = fileName;
            this.totalBytesCopied = 0L;
            this.bytesSinceLastReport = 0L;
            this.totalBytes = totalBytes;
            this.lastReportNs = System.nanoTime();
        }

        public void recordBytes(long bytes) {
            this.totalBytesCopied += bytes;
            this.bytesSinceLastReport += bytes;
        }

        public void reset() {
            this.bytesSinceLastReport = 0;
            this.lastReportNs = System.nanoTime();
        }

        public long getBytesSinceLastReport() {
            return bytesSinceLastReport;
        }

        public double getPercentCopied() {
            if (totalBytes == 0) {
                return 0.0;
            } else {
                return (double) (totalBytesCopied * 100) / (double) totalBytes;
            }
        }

        @JmxGetter(name = "totalBytesCopied", description = "The total number of bytes copied so far in this transfer.")
        public long getTotalBytesCopied() {
            return totalBytesCopied;
        }

        @JmxGetter(name = "bytesPerSecond", description = "The rate of the transfer in bytes/second.")
        public double getBytesPerSecond() {
            double ellapsedSecs = (System.nanoTime() - lastReportNs) / (double) Time.NS_PER_SECOND;
            return bytesSinceLastReport / ellapsedSecs;
        }

        @JmxGetter(name = "filename", description = "The file path being copied.")
        public String getFilename() {
            return this.fileName;
        }
    }

    /**
     * A comparator that sorts index files last. This is a heuristic for
     * retaining the index file in page cache until the swap occurs
     * 
     */
    public static class IndexFileLastComparator implements Comparator<RestFileStatus> {

        @Override
        public int compare(RestFileStatus fs1, RestFileStatus fs2) {
            // directories before files
            if (fs1.isDir())
                return fs2.isDir() ? 0 : -1;
            if (fs2.isDir())
                return fs1.isDir() ? 0 : 1;

            String f1 = fs1.getPathSuffix(), f2 = fs2.getPathSuffix();

            // All metadata files given priority
            if (f1.endsWith("metadata"))
                return -1;
            if (f2.endsWith("metadata"))
                return 1;

            // if both same, lexicographically
            if ((f1.endsWith(".index") && f2.endsWith(".index"))
                    || (f1.endsWith(".data") && f2.endsWith(".data"))) {
                return f1.compareToIgnoreCase(f2);
            }

            if (f1.endsWith(".index")) {
                return 1;
            } else {
                return -1;
            }
        }
    }

    /*
     * Main method for testing fetching
     */
    public static void main(String[] args) throws Exception {
        if (args.length < 5)
            Utils.croak("USAGE: java " + RestHadoopFetcher.class.getName()
                    + " [url] [keytab location] [kerberos username] [kerberos realm] [kerberos kdc]");

        long MAX_BYTES_PER_SECOND = 1024 * 1024 * 1024;
        long REPORTING_INTERVAL_BYTES = VoldemortConfig.REPORTING_INTERVAL_BYTES;
        int BUFFER_SIZE = VoldemortConfig.DEFAULT_BUFFER_SIZE;

        String url = args[0];
        String keytabLocation = args[1];
        String kerberosUser = args[2];
        String realm = args[3];
        String kdc = args[4];

        // login
        RestHadoopAuth restAuth = new RestHadoopAuth(realm, kdc, kerberosUser, keytabLocation);
        restAuth.start();

        RestHadoopFetcher fetcher = new RestHadoopFetcher(null, MAX_BYTES_PER_SECOND, REPORTING_INTERVAL_BYTES,
                BUFFER_SIZE, 0, 3, 1000);

        // start file fetching
        long start = System.currentTimeMillis();
        File location = fetcher.fetch(url, System.getProperty("user.home") + File.separator + start);

        // complete file fetching; print stats
        long size = location.length();
        double rate = size * Time.MS_PER_SECOND / (double) (System.currentTimeMillis() - start);
        NumberFormat nf = NumberFormat.getInstance();
        nf.setMaximumFractionDigits(2);
        System.out.println(
                "Fetch to " + location + " completed: " + nf.format(rate / (1024.0 * 1024.0)) + " MB/sec.");

        // logout
        restAuth.stop();
    }
}