com.dnanexus.DXFile.java Source code

Java tutorial

Introduction

Here is the source code for com.dnanexus.DXFile.java

Source

// Copyright (C) 2013-2016 DNAnexus, Inc.
//
// This file is part of dx-toolkit (DNAnexus platform client libraries).
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain a
// copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

package com.dnanexus;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Map;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.NoHttpResponseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.client.HttpClientBuilder;

import com.dnanexus.DXHTTPRequest.RetryStrategy;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

/**
 * A file (an opaque sequence of bytes).
 */
public class DXFile extends DXDataObject {

    /**
     * Builder class for creating a new {@code DXFile} object. To obtain an instance, call
     * {@link DXFile#newFile()}.
     */
    public static class Builder extends DXDataObject.Builder<Builder, DXFile> {
        private String media;
        private InputStream uploadData;

        private Builder() {
            super();
        }

        private Builder(DXEnvironment env) {
            super(env);
        }

        /**
         * Creates the file.
         *
         * @return a {@code DXFile} object corresponding to the newly created object
         */
        @Override
        public DXFile build() {
            DXFile file = new DXFile(
                    DXAPI.fileNew(this.buildRequestHash(), ObjectNewResponse.class, this.env).getId(), this.project,
                    this.env, null);

            if (uploadData != null) {
                try {
                    file.upload(uploadData);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }

            return file;
        }

        /**
         * Use this method to test the JSON hash created by a particular builder call without
         * actually executing the request.
         *
         * @return a JsonNode
         */
        @VisibleForTesting
        JsonNode buildRequestHash() {
            checkAndFixParameters();
            return MAPPER.valueToTree(new FileNewRequest(this));
        }

        /*
         * (non-Javadoc)
         *
         * @see com.dnanexus.DXDataObject.Builder#getThisInstance()
         */
        @Override
        protected Builder getThisInstance() {
            return this;
        }

        /**
         * Sets the Internet Media Type of the file to be created.
         *
         * @param mediaType Internet Media Type
         *
         * @return the same {@code Builder} object
         */
        public Builder setMediaType(String mediaType) {
            Preconditions.checkState(this.media == null, "Cannot call setMediaType more than once");
            this.media = Preconditions.checkNotNull(mediaType, "mediaType may not be null");
            return getThisInstance();
        }

        /**
         * Uploads the data in the specified byte array to the file to be created.
         *
         * @param data data to be uploaded
         *
         * @return the same {@code Builder} object
         */
        public Builder upload(byte[] data) {
            Preconditions.checkNotNull(data, "data may not be null");
            InputStream dataStream = new ByteArrayInputStream(data);
            return this.upload(dataStream);
        }

        /**
         * Uploads the data in the specified stream to the file to be created.
         *
         * @param data stream containing data to be uploaded
         *
         * @return the same {@code Builder} object
         */
        public Builder upload(InputStream data) {
            Preconditions.checkNotNull(this.uploadData == null, "Cannot call upload more than once");
            this.uploadData = Preconditions.checkNotNull(data, "data may not be null");
            return getThisInstance();
        }
    }

    /**
     * Contains metadata for a file.
     */
    public static class Describe extends DXDataObject.Describe {
        @JsonProperty
        private String media;
        @JsonProperty
        private Long size;

        private Describe() {
            super();
        }

        /**
         * Returns the Internet Media Type of the file.
         *
         * @return Internet Media Type
         */
        public String getMediaType() {
            Preconditions.checkState(this.media != null,
                    "media type is not accessible because it was not retrieved with the describe call");
            return media;
        }

        /**
         * Returns the size of the file in bytes.
         *
         * @return size of file
         */
        public Long getSize() {
            Preconditions.checkState(this.size != null,
                    "file size is not accessible because it was not retrieved with the describe call");
            return size;
        }
    }

    private class FileApiInputStream extends InputStream {
        private FileDownloadResponse apiResponse;

        private long chunkSize = minDownloadChunkSize;
        private long nextByteFromApi;
        private final long readEnd;
        private int request = 1;
        private ByteArrayInputStream unreadBytes;

        private FileApiInputStream(long readStart, long readEnd) {
            // API call returns URL and headers for HTTP GET requests
            JsonNode output = apiCallOnObject("download", MAPPER.valueToTree(new FileDownloadRequest(true)),
                    RetryStrategy.SAFE_TO_RETRY);
            try {
                apiResponse = MAPPER.treeToValue(output, FileDownloadResponse.class);
            } catch (JsonProcessingException e) {
                throw new RuntimeException(e);
            }

            if (readEnd == -1) {
                readEnd = describe().getSize();
            }
            Preconditions.checkArgument(readEnd >= readStart, "The start byte cannot be larger than the end byte");
            this.readEnd = readEnd;
            this.nextByteFromApi = readStart;
        }

        @Override
        public int read() throws IOException {
            byte[] b = new byte[1];
            int numBytesRead = read(b);
            if (numBytesRead != -1) {
                return b[0];
            }
            return -1;
        }

        @Override
        public int read(byte[] b) throws IOException {
            return read(b, 0, b.length);
        }

        @Override
        public int read(byte[] b, int off, int numBytes) throws IOException {
            if (off < 0 || numBytes < 0 || numBytes > b.length - off) {
                throw new IndexOutOfBoundsException();
            }

            if (numBytes == 0) {
                return 0;
            }

            // Ramp up download request size
            if (chunkSize < maxDownloadChunkSize) {
                if (request > numRequestsBetweenRamp) {
                    request = 1;
                    chunkSize = Math.min(chunkSize * ramp, maxDownloadChunkSize);
                }
            }

            long startRange = nextByteFromApi;
            long endRange = startRange + chunkSize - 1;

            if (startRange >= readEnd) {
                return -1;
            }

            // Request more data to buffer
            if (unreadBytes == null || unreadBytes.available() == 0) {
                unreadBytes = new ByteArrayInputStream(partDownloadRequest(apiResponse.url, startRange, endRange));
            }

            assert (unreadBytes != null && unreadBytes.available() > 0);
            int bytesToRead = Math.min(numBytes, unreadBytes.available());
            int bytesRead = unreadBytes.read(b, off, bytesToRead);

            // verify expected bytes read, namely from unreadBytes.available()
            assert (bytesRead == bytesToRead);

            // Increment byte range from request for next chunk of data to buffer
            if (unreadBytes.available() == 0) {
                nextByteFromApi = endRange + 1;
                request++;
            }

            return bytesToRead;
        }
    }

    private class FileApiOutputStream extends OutputStream {
        private int index = 1;
        private ByteArrayOutputStream unwrittenBytes = new ByteArrayOutputStream();

        @Override
        public void close() throws IOException {
            // Flush out remaining bytes to upload
            partUploadRequest(unwrittenBytes.toByteArray(), index);
            unwrittenBytes = new ByteArrayOutputStream();
        }

        @Override
        public void write(byte[] b) throws IOException {
            write(b, 0, b.length);
        }

        @Override
        public void write(byte[] b, int off, int numBytes) throws IOException {
            unwrittenBytes.write(b, off, numBytes);
            if (unwrittenBytes.size() >= uploadChunkSize) {
                byte[] bytesToWrite = unwrittenBytes.toByteArray();
                int chunkStart = 0;
                while (bytesToWrite.length - chunkStart >= uploadChunkSize) {
                    partUploadRequest(Arrays.copyOfRange(bytesToWrite, chunkStart, chunkStart + uploadChunkSize),
                            index);
                    chunkStart += uploadChunkSize;
                    index++;
                }
                unwrittenBytes = new ByteArrayOutputStream();
                IOUtils.write(Arrays.copyOfRange(bytesToWrite, chunkStart, bytesToWrite.length), unwrittenBytes);
            }
        }

        @Override
        public void write(int b) throws IOException {
            byte[] byteAsArray = new byte[1];
            byteAsArray[0] = (byte) b;
            write(byteAsArray);
        }
    }

    /**
     * Request to /file-xxxx/download.
     */
    @JsonInclude(Include.NON_NULL)
    private static class FileDownloadRequest {
        @JsonProperty("preauthenticated")
        private boolean preauth;

        private FileDownloadRequest(boolean preauth) {
            this.preauth = preauth;
        }
    }

    /**
     * Deserialized output from the /file-xxxx/download route.
     */
    @JsonIgnoreProperties(ignoreUnknown = true)
    private static class FileDownloadResponse {
        @JsonProperty
        private Map<String, String> headers;
        @JsonProperty
        private String url;
    }

    @JsonInclude(Include.NON_NULL)
    private static class FileNewRequest extends DataObjectNewRequest {
        @JsonProperty
        private final String media;

        public FileNewRequest(Builder builder) {
            super(builder);
            this.media = builder.media;
        }
    }

    /**
     * Request to /file-xxxx/upload.
     */
    @JsonInclude(Include.NON_NULL)
    private static class FileUploadRequest {
        @JsonProperty
        private int index = 1;
        @JsonProperty
        private String md5;
        @JsonProperty
        private int size;

        private FileUploadRequest(int size, String md5, int index) {
            this.size = size;
            this.md5 = md5;
            this.index = index;
        }
    }

    /**
     * Response from /file-xxxx/upload
     */
    @JsonIgnoreProperties(ignoreUnknown = true)
    private static class FileUploadResponse {
        @JsonProperty
        private Map<String, String> headers;
        @JsonProperty
        private String url;
    }

    private static final String USER_AGENT = DXUserAgent.getUserAgent();

    /**
     * Deserializes a DXFile from JSON containing a DNAnexus link.
     *
     * @param value JSON object map
     *
     * @return data object
     */
    @JsonCreator
    private static DXFile create(Map<String, Object> value) {
        checkDXLinkFormat(value);
        // TODO: how to set the environment?
        return DXFile.getInstance((String) value.get("$dnanexus_link"));
    }

    /**
     * Executes HTTP Request with retry logic
     *
     * @param httpclient
     * @param request HttpGet, HttpPost, or HttpPut request
     *
     * @return response to the HTTP Request
     *
     * @throws IOException
     */
    private static HttpResponse executeRequestWithRetry(HttpClient httpclient, HttpRequestBase request)
            throws IOException {
        HttpResponse response;
        int RETRY_ATTEMPTS = 1;
        int timeoutSeconds = 1;
        while (true) {
            try {
                response = httpclient.execute(request);
            } catch (NoHttpResponseException e) {
                // Maximum 5 retries
                RETRY_ATTEMPTS++;
                if (RETRY_ATTEMPTS > 5) {
                    throw e;
                }
                System.out.println(
                        "Error downloading chunk. Waiting " + timeoutSeconds + " second(s) before retrying...");
                sleep(timeoutSeconds);
                timeoutSeconds *= 2;
                continue;
            }
            return response;
        }
    }

    /**
     * Returns a {@code DXFile} associated with an existing file.
     *
     * @throws NullPointerException If {@code fileId} is null
     */
    public static DXFile getInstance(String fileId) {
        return new DXFile(fileId, null);
    }

    /**
     * Returns a {@code DXFile} associated with an existing file in a particular project or
     * container.
     *
     * @throws NullPointerException If {@code fileId} or {@code container} is null
     */
    public static DXFile getInstance(String fileId, DXContainer project) {
        return new DXFile(fileId, project, null, null);
    }

    /**
     * Returns a {@code DXFile} associated with an existing file in a particular project using the
     * specified environment, with the specified cached describe output.
     *
     * <p>
     * This method is for use exclusively by bindings to the "find" routes when describe hashes are
     * returned with the find output.
     * </p>
     *
     * @throws NullPointerException If any argument is null
     */
    static DXFile getInstanceWithCachedDescribe(String fileId, DXContainer project, DXEnvironment env,
            JsonNode describe) {
        return new DXFile(fileId, project, Preconditions.checkNotNull(env, "env may not be null"),
                Preconditions.checkNotNull(describe, "describe may not be null"));
    }

    /**
     * Returns a {@code DXFile} associated with an existing file in a particular project using the
     * specified environment.
     *
     * @throws NullPointerException If {@code fileId} or {@code container} is null
     */
    public static DXFile getInstanceWithEnvironment(String fileId, DXContainer project, DXEnvironment env) {
        return new DXFile(fileId, project, Preconditions.checkNotNull(env, "env may not be null"), null);
    }

    /**
     * Returns a {@code DXFile} associated with an existing file using the specified environment.
     *
     * @throws NullPointerException If {@code fileId} is null
     */
    public static DXFile getInstanceWithEnvironment(String fileId, DXEnvironment env) {
        return new DXFile(fileId, Preconditions.checkNotNull(env, "env may not be null"));
    }

    /**
     * Returns a Builder object for creating a new {@code DXFile}.
     *
     * @return a newly initialized builder object
     */
    public static Builder newFile() {
        return new Builder();
    }

    /**
     * Returns a Builder object for creating a new {@code DXFile} using the specified environment.
     *
     * @param env environment to use to make API calls
     *
     * @return a newly initialized builder object
     */
    public static Builder newFileWithEnvironment(DXEnvironment env) {
        return new Builder(env);
    }

    /**
     * HTTP GET request to download part of the file.
     *
     * @param url URL to which an HTTP GET request is made to download the file
     * @param chunkStart beginning of the part (in the byte array containing the file contents) to
     *        be downloaded. This index is inclusive in the range.
     * @param chunkEnd end of the part (in the byte array containing the file contents) to be
     *        downloaded. This index is inclusive in the range.
     *
     * @return byte array containing the part of the file contents that is downloaded
     *
     * @throws ClientProtocolException HTTP request to the download URL cannot be executed
     * @throws IOException unable to get file contents from HTTP response
     */
    private static byte[] partDownloadRequest(String url, long start, long end)
            throws ClientProtocolException, IOException {
        Preconditions.checkState(end - start <= (long) 2 * 1024 * 1024 * 1024,
                "Download chunk size cannot be larger than 2GB");
        HttpClient httpclient = HttpClientBuilder.create().setUserAgent(USER_AGENT).build();

        // HTTP GET request with bytes/_ge range header
        HttpGet request = new HttpGet(url);
        request.addHeader("Range", "bytes=" + start + "-" + end);

        HttpResponse response = executeRequestWithRetry(httpclient, request);
        InputStream content = response.getEntity().getContent();

        return IOUtils.toByteArray(content);
    }

    /**
     * Sleeps for the specified amount of time. Throws a {@link RuntimeException} if interrupted.
     *
     * @param seconds number of seconds to sleep for
     */
    private static void sleep(int seconds) {
        try {
            Thread.sleep(seconds * 1000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    // Variables for download
    private final int maxDownloadChunkSize = 16 * 1024 * 1024;
    private final int minDownloadChunkSize = 64 * 1024;
    private final int numRequestsBetweenRamp = 4;

    // Ramp up factor for downloading by parts
    private final int ramp = 2;

    // Variables for upload
    @VisibleForTesting
    int uploadChunkSize = 16 * 1024 * 1024;

    private DXFile(String fileId, DXContainer project, DXEnvironment env, JsonNode describe) {
        super(fileId, "file", project, env, describe);
    }

    private DXFile(String fileId, DXEnvironment env) {
        super(fileId, "file", env, null);
    }

    @Override
    public DXFile close() {
        super.close();
        return this;
    }

    @Override
    public DXFile closeAndWait() {
        super.closeAndWait();
        return this;
    }

    @Override
    public Describe describe() {
        return DXJSON.safeTreeToValue(apiCallOnObject("describe", RetryStrategy.SAFE_TO_RETRY), Describe.class);
    }

    @Override
    public Describe describe(DescribeOptions options) {
        return DXJSON.safeTreeToValue(
                apiCallOnObject("describe", MAPPER.valueToTree(options), RetryStrategy.SAFE_TO_RETRY),
                Describe.class);
    }

    /**
     * Downloads the entire file into a byte array.
     *
     * @return byte array containing file contents
     * @throws IOException if an error occurs while downloading the data
     */
    public byte[] downloadBytes() throws IOException {
        // -1 indicates the end of the file
        return downloadBytes(0, -1);
    }

    /**
     * Downloads the specified byte range of the file into a byte array. Range requested must be no
     * larger than 2 GB.
     *
     * @param start first byte of the range within the file to be downloaded. The start byte is
     *        inclusive in the range, and 0 is indexed as the first byte in the file.
     * @param end last byte of the range within the file to be downloaded. The end byte is exclusive
     *        (not included in the range). An input of -1 specifies the end of the file.
     *
     * @return byte array containing file contents within range specified
     * @throws IOException if an error occurs while downloading the data
     */
    public byte[] downloadBytes(long start, long end) throws IOException {
        Preconditions.checkState(end - start <= (long) 2 * 1024 * 1024 * 1024,
                "Range of file larger than 2GB cannot be downloaded with downloadBytes");
        InputStream is = getDownloadStream(start, end);

        return IOUtils.toByteArray(is);
    }

    /**
     * Downloads the entire file and writes the data to an OutputStream.
     *
     * @param os output stream downloaded file contents are written into
     * @throws IOException
     */
    public void downloadToOutputStream(OutputStream os) throws IOException {
        downloadToOutputStream(os, 0, describe().getSize());
    }

    /**
     * Downloads the specified byte range of the file into an OutputStream.
     *
     * @param os output stream downloaded file contents are written into
     * @param start first byte of the range within the file to be downloaded. The start byte is
     *        inclusive in the range, and 0 is indexed as the first byte in the file.
     * @param end last byte of the range within the file to be downloaded. The end byte is exclusive
     *        (not included in the range). An input of -1 specifies the end of the file.
     *
     * @throws IOException
     */
    public void downloadToOutputStream(OutputStream os, long start, long end) throws IOException {
        InputStream is = getDownloadStream(start, end);
        IOUtils.copyLarge(is, os);
    }

    @Override
    public Describe getCachedDescribe() {
        this.checkCachedDescribeAvailable();
        return DXJSON.safeTreeToValue(this.cachedDescribe, Describe.class);
    }

    /**
     * Returns a stream of the file's contents.
     *
     * @return stream containing file contents
     */
    public InputStream getDownloadStream() {
        // -1 indicates the end of the file
        return getDownloadStream(0, -1);
    }

    /**
     * Returns a stream of the specified byte range of the file's contents.
     *
     * @param start first byte of the range within the file to be downloaded. The start byte is
     *        inclusive in the range, and 0 is indexed as the first byte in the file.
     * @param end last byte of the range within the file to be downloaded. The end byte is exclusive
     *        (not included in the range). An input of -1 specifies the end of the file.
     *
     * @return stream containing file contents within range specified
     */
    public InputStream getDownloadStream(long start, long end) {
        return new FileApiInputStream(start, end);
    }

    /**
     * Returns an OutputStream that uploads any data written to it
     * <p>
     * The file must be in the "open" state. This method assumes exclusive access to the file: the
     * file must have no parts uploaded before this call is made, and no other clients may upload
     * data to the same file concurrently.
     * </p>
     *
     * @return OutputStream to which file contents are written
     */
    public OutputStream getUploadStream() {
        return new FileApiOutputStream();
    }

    /**
     * HTTP PUT request to upload the data part to the server.
     *
     * @param dataChunk data part that is uploaded
     * @param index position for which the data lies in the file
     * @throws IOException if unable to execute HTTP request
     */
    private void partUploadRequest(byte[] dataChunk, int index) throws IOException {
        // MD5 digest as 32 character hex string
        String dataMD5 = DigestUtils.md5Hex(dataChunk);

        // API call returns URL and headers
        JsonNode output = apiCallOnObject("upload",
                MAPPER.valueToTree(new FileUploadRequest(dataChunk.length, dataMD5, index)),
                RetryStrategy.SAFE_TO_RETRY);

        FileUploadResponse apiResponse;
        try {
            apiResponse = MAPPER.treeToValue(output, FileUploadResponse.class);
        } catch (JsonProcessingException e) {
            throw new RuntimeException(e);
        }

        // Check that the content-length received by the apiserver is the same
        // as the length of the data
        if (apiResponse.headers.containsKey("content-length")) {
            int apiserverContentLength = Integer.parseInt(apiResponse.headers.get("content-length"));
            if (apiserverContentLength != dataChunk.length) {
                throw new AssertionError(
                        "Content-length received by the apiserver did not match that of the input data");
            }
        }

        // HTTP PUT request to upload URL and headers
        HttpPut request = new HttpPut(apiResponse.url);
        request.setEntity(new ByteArrayEntity(dataChunk));

        // Set headers
        for (Map.Entry<String, String> header : apiResponse.headers.entrySet()) {
            String key = header.getKey();

            // The request implicitly supplies the content length in the headers
            // when executed
            if (key.equals("content-length")) {
                continue;
            }

            request.setHeader(key, header.getValue());
        }

        HttpClient httpclient = HttpClientBuilder.create().setUserAgent(USER_AGENT).build();
        executeRequestWithRetry(httpclient, request);
    }

    /**
     * Uploads data from the specified byte array to the file.
     *
     * <p>
     * The file must be in the "open" state. This method assumes exclusive access to the file: the
     * file must have no parts uploaded before this call is made, and no other clients may upload
     * data to the same file concurrently.
     * </p>
     *
     * @param data data in bytes to be uploaded
     *
     * @throws IOException if an error occurs while uploading the data
     */
    public void upload(byte[] data) throws IOException {
        Preconditions.checkNotNull(data, "data may not be null");
        try (OutputStream uploadOutputStream = this.getUploadStream()) {
            IOUtils.write(data, uploadOutputStream);
        }
    }

    /**
     * Uploads data from the specified stream to the file.
     *
     * <p>
     * The file must be in the "open" state. This method assumes exclusive access to the file: the
     * file must have no parts uploaded before this call is made, and no other clients may upload
     * data to the same file concurrently.
     * </p>
     *
     * @param data stream containing data to be uploaded
     *
     * @throws IOException if an error occurs while uploading the data
     */
    public void upload(InputStream data) throws IOException {
        Preconditions.checkNotNull(data, "data may not be null");
        try (OutputStream uploadOutputStream = this.getUploadStream()) {
            IOUtils.copyLarge(data, uploadOutputStream);
        }
    }
}