com.microsoft.azure.management.datalake.store.uploader.SingleSegmentUploader.java Source code

Java tutorial

Introduction

Here is the source code for com.microsoft.azure.management.datalake.store.uploader.SingleSegmentUploader.java

Source

/**
 * Copyright (c) Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See License.txt in the project root for
 * license information.
 */
package com.microsoft.azure.management.datalake.store.uploader;

import org.apache.commons.lang3.StringUtils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.text.MessageFormat;

/**
 * Represents an uploader for a single segment of a larger file.
 */
public class SingleSegmentUploader {

    /**
     * The length of the buffers to upload (4MB).
     */
    public static final int BUFFER_LENGTH = 4 * 1024 * 1024;

    /** 4MB is the maximum length of a single extent. So if one record is longer than this,
     * then we will fast fail, since that record will cross extent boundaries.
     */
    public static final int MAX_RECORD_LENGTH = 4 * 1024 * 1024;

    /**
     * During upload retries, this indicates the maximum amount of time, in seconds, that we will wait between retries.
     */
    public static final int MAXIMUM_BACKOFF_WAIT_SECONDS = 32;

    /**
     * The maximum number of times to attempt to upload the buffer.
     */
    public static final int MAX_BUFFER_UPLOAD_ATTEMPT_COUNT = 4;

    private FrontEndAdapter frontEndAdapter;
    private UploadSegmentMetadata segmentMetadata;
    private UploadMetadata metadata;

    /**
     * Creates a new uploader for a single segment.
     *
     * @param segmentNumber The sequence number of the segment.
     * @param uploadMetadata The metadata for the entire upload.
     * @param frontEnd A pointer to the front end.
     */
    public SingleSegmentUploader(int segmentNumber, UploadMetadata uploadMetadata, FrontEndAdapter frontEnd) {
        metadata = uploadMetadata;
        segmentMetadata = uploadMetadata.getSegments()[segmentNumber];
        frontEndAdapter = frontEnd;
        this.useBackOffRetryStrategy = true;
    }

    /**
     * Gets or sets a value indicating whether to use a back-off (exponenential) in case of individual block failures.
     * If set to 'false' every retry is handled immediately; otherwise an amount of time is waited between retries, as a function of power of 2.
     */
    private boolean useBackOffRetryStrategy;

    /**
     *
     * @return A value indicating whether to use a back-off (exponenential) in case of individual block failures.
     * If set to 'false' every retry is handled immediately; otherwise an amount of time is waited between retries, as a function of power of 2.
     */
    public boolean useBackOffRetryStrategy() {
        return useBackOffRetryStrategy;
    }

    /**
     *
     * @param isEnabled A value indicating whether to use a back-off (exponenential) in case of individual block failures.
     * If set to 'false' every retry is handled immediately; otherwise an amount of time is waited between retries, as a function of power of 2.
     */
    public void setUseBackOffRetryStrategy(boolean isEnabled) {
        useBackOffRetryStrategy = isEnabled;
    }

    /**
     * Uploads the portion of the InputFilePath to the given TargetStreamPath, starting at the given StartOffset.
     * The segment is further divided into equally-sized blocks which are uploaded in sequence.
     * Each such block is attempted a certain number of times; if after that it still cannot be uploaded, the entire segment is aborted (in which case no cleanup is performed on the server).
     *
     * @throws Exception if there is any failure during the upload
     */
    public void upload() throws Exception {
        File fileInfo = new File(metadata.getInputFilePath());
        if (!(fileInfo.exists())) {
            throw new FileNotFoundException("Unable to locate input file: " + metadata.getInputFilePath());
        }

        //open up a reader from the input file, seek to the appropriate offset
        try (RandomAccessFile inputStream = openInputStream()) {
            long endPosition = segmentMetadata.getOffset() + segmentMetadata.getLength();
            if (endPosition > fileInfo.length()) {
                throw new IllegalArgumentException("StartOffset+UploadLength is beyond the end of the input file");
            }

            uploadSegmentContents(inputStream, endPosition);

            verifyUploadedStream();
            //any exceptions are (re)thrown to be handled by the caller; we do not handle retries or other recovery techniques here
        }
    }

    /**
     * Verifies the uploaded stream.
     *
     * @throws Exception if there is any failure validating the stream being uploaded.
     */
    private void verifyUploadedStream() throws Exception {
        //verify that the remote stream has the length we expected.
        int retryCount = 0;
        long remoteLength = -1;
        while (retryCount < MAX_BUFFER_UPLOAD_ATTEMPT_COUNT) {
            retryCount++;
            try {
                remoteLength = frontEndAdapter.getStreamLength(segmentMetadata.getPath());
                break;
            } catch (Exception ex) {
                if (retryCount >= MAX_BUFFER_UPLOAD_ATTEMPT_COUNT) {
                    throw ex;
                }

                waitForRetry(retryCount, this.useBackOffRetryStrategy);
            }
        }

        if (segmentMetadata.getLength() != remoteLength) {
            throw new UploadFailedException(MessageFormat.format(
                    "Post-upload stream verification failed: target stream has a length of {0}, expected {1}",
                    remoteLength, segmentMetadata.getLength()));
        }
    }

    /**
     * Uploads the segment contents.
     *
     * @param inputStream The input stream.
     * @param endPosition The end position.
     * @throws Exception if there is any failure attempting to upload the contents of a single segment.
     */
    private void uploadSegmentContents(RandomAccessFile inputStream, long endPosition) throws Exception {
        long bytesCopiedSoFar = 0; // we start off with a fresh stream

        byte[] buffer = new byte[BUFFER_LENGTH];
        int residualBufferLength = 0; //the number of bytes that remained in the buffer from the last upload (bytes which were not uploaded)

        while (inputStream.getFilePointer() < endPosition) {
            //read a block of data, and keep track of how many bytes are actually read
            int bytesRead = readIntoBuffer(inputStream, buffer, residualBufferLength, endPosition);
            int bufferDataLength = residualBufferLength + bytesRead;

            //determine the cutoff offset for upload - everything before will be uploaded, everything after is residual; (the position of the last record in this buffer)
            int uploadCutoff = bufferDataLength;
            if (!metadata.isBinary()) {
                uploadCutoff = determineUploadCutoffForTextFile(buffer, bufferDataLength, inputStream);
            }

            bytesCopiedSoFar = uploadBuffer(buffer, uploadCutoff, bytesCopiedSoFar);

            residualBufferLength = bufferDataLength - uploadCutoff;
            if (residualBufferLength > 0) {
                //move the remainder of the buffer to the front
                System.arraycopy(buffer, uploadCutoff, buffer, 0, residualBufferLength);
            }
        }

        //make sure we don't leave anything behind
        if (residualBufferLength > 0) {
            uploadBuffer(buffer, residualBufferLength, bytesCopiedSoFar);
        }

        buffer = null;
    }

    /**
     * Determines the upload cutoff for text file.
     *
     * @param buffer The buffer.
     * @param bufferDataLength length of the buffer data.
     * @param inputStream The input stream.
     * @return The index within the buffer which indicates a record boundary cutoff for a single append request for a text file.
     * @throws UploadFailedException indicates that the upload failed for the specified reason.
     * @throws IOException indicates the path is inaccessible or does not exist.
     */
    private int determineUploadCutoffForTextFile(byte[] buffer, int bufferDataLength, RandomAccessFile inputStream)
            throws UploadFailedException, IOException {
        Charset encoding = Charset.forName(metadata.getEncodingName());
        //NOTE: we return an offset, but everywhere else below we treat it as a byte count; in order for that to work, we need to add 1 to the result of FindNewLine.
        int uploadCutoff = StringExtensions.findNewline(buffer, bufferDataLength - 1, bufferDataLength, true,
                encoding, metadata.getDelimiter()) + 1;
        if (uploadCutoff <= 0 && (metadata.getSegmentCount() > 1 || bufferDataLength >= MAX_RECORD_LENGTH)) {
            throw new UploadFailedException(MessageFormat.format(
                    "Found a record that exceeds the maximum allowed record length around offset {0}",
                    inputStream.getFilePointer()));
        }

        //a corner case here is when the newline is 2 chars long, and the first of those lands on the last byte of the buffer. If so, let's try to find another
        //newline inside the buffer, because we might be splitting this wrongly.
        if ((metadata.getDelimiter() == null || StringUtils.isEmpty(metadata.getDelimiter()))
                && uploadCutoff == buffer.length && buffer[buffer.length - 1] == (byte) '\r') {
            int newCutoff = StringExtensions.findNewline(buffer, bufferDataLength - 2, bufferDataLength - 1, true,
                    encoding, metadata.getDelimiter()) + 1;
            if (newCutoff > 0) {
                uploadCutoff = newCutoff;
            }
        }

        return uploadCutoff;
    }

    /**
     * Uploads the buffer.
     *
     * @param buffer The buffer.
     * @param bytesToCopy The bytes to copy.
     * @param targetStreamOffset The target stream offset.
     * @return The current index within the target stream after uploading the buffer.
     * @throws Exception Thrown if there is a failure uploading the current buffer.
     */
    private long uploadBuffer(byte[] buffer, int bytesToCopy, long targetStreamOffset) throws Exception {
        //append it to the remote stream
        int attemptCount = 0;
        boolean uploadCompleted = false;
        while (!uploadCompleted && attemptCount < MAX_BUFFER_UPLOAD_ATTEMPT_COUNT) {
            attemptCount++;
            try {
                if (targetStreamOffset == 0) {
                    frontEndAdapter.createStream(segmentMetadata.getPath(), true, buffer, bytesToCopy);
                } else {
                    frontEndAdapter.appendToStream(segmentMetadata.getPath(), buffer, targetStreamOffset,
                            bytesToCopy);

                }

                uploadCompleted = true;
                targetStreamOffset += bytesToCopy;
            } catch (Exception ex) {
                //if we tried more than the number of times we were allowed to, give up and throw the exception
                if (attemptCount >= MAX_BUFFER_UPLOAD_ATTEMPT_COUNT) {
                    throw ex;
                } else {
                    waitForRetry(attemptCount, this.useBackOffRetryStrategy);
                }
            }
        }

        return targetStreamOffset;
    }

    /**
     * Reads the data into the buffer.
     *
     * @param inputStream The stream to read data from.
     * @param buffer The buffer to read data into
     * @param bufferOffset The offset in the buffer to begin pushing data
     * @param streamEndPosition The last point in the stream to read.
     * @return The number of bytes read into the buffer.
     * @throws IOException Thrown if there is an issue accessing the stream or the pointer to the file.
     */
    private int readIntoBuffer(RandomAccessFile inputStream, byte[] buffer, int bufferOffset,
            long streamEndPosition) throws IOException {
        //read a block of data
        int bytesToRead = buffer.length - bufferOffset;
        if (bytesToRead > streamEndPosition - inputStream.getFilePointer()) {
            //last read may be smaller than previous reads; readjust # of bytes to read accordingly
            bytesToRead = (int) (streamEndPosition - inputStream.getFilePointer());
        }

        int remainingBytes = bytesToRead;

        while (remainingBytes > 0) {
            //Stream.Read may not read all the bytes we requested, so we need to retry until we filled up the entire buffer
            int bytesRead = inputStream.read(buffer, bufferOffset, remainingBytes);
            bufferOffset += bytesRead;
            remainingBytes = bytesToRead - bufferOffset;
        }

        return bytesToRead;
    }

    /**
     * Enables use of a back off retry strategy, allowing a caller to wait before attempting an action again.
     *
     * @param attemptCount The number of attempts that have already been done
     * @param useBackOffRetryStrategy whether to use the back off strategy or not.
     * @throws InterruptedException Thrown if there is an interrupt during the sleep.
     */
    public static void waitForRetry(int attemptCount, boolean useBackOffRetryStrategy) throws InterruptedException {
        if (!useBackOffRetryStrategy) {
            //no need to wait
            return;
        }

        int intervalSeconds = Math.max(MAXIMUM_BACKOFF_WAIT_SECONDS, (int) Math.pow(2, attemptCount));
        Thread.sleep(intervalSeconds * 1000);
    }

    /**
     * Opens the input stream.
     * @return A {@link RandomAccessFile} stream of the file being uploaded.
     * @throws IOException Thrown if the input stream cannot be opened due to file accessibility or existence.
     */
    private RandomAccessFile openInputStream() throws IOException {
        RandomAccessFile stream = new RandomAccessFile(metadata.getInputFilePath(), "r");

        if (segmentMetadata.getOffset() >= stream.length()) {
            throw new IllegalArgumentException("StartOffset is beyond the end of the input file");
        }

        // always seek from the beginning of the file
        stream.seek(0);
        stream.seek(segmentMetadata.getOffset());
        return stream;
    }
}