fr.gael.dhus.util.http.DownloadableProduct.java Source code

Java tutorial

Introduction

Here is the source code for fr.gael.dhus.util.http.DownloadableProduct.java

Source

/*
 * Data Hub Service (DHuS) - For Space data distribution.
 * Copyright (C) 2017,2018 GAEL Systems
 *
 * This file is part of DHuS software sources.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package fr.gael.dhus.util.http;

import fr.gael.dhus.database.object.Product;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.channels.Channels;
import java.nio.channels.InterruptibleChannel;
import java.nio.channels.Pipe;
import java.nio.channels.WritableByteChannel;
import java.security.DigestException;
import java.security.MessageDigest;
import java.util.Formatter;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import org.dhus.AbstractProduct;
import org.dhus.ProductConstants;

import org.springframework.security.crypto.codec.Hex;

/**
 * A Product that supports the InputStream implementation.
 * Every time getImpl(InputStream.class) is invoked, a new download starts.
 * Download happens in a thread, there is no thread pool.
 * Uses the {@link InterruptibleHttpClient} and a {@link Pipe}.
 * Both the Sink and Source channels of a pipe implement
 * {@link java.nio.channels.InterruptibleChannel}, making this class suitable for syncing tasks.
 */
public class DownloadableProduct extends AbstractProduct implements Closeable {
    /** Log. */
    private static final Logger LOGGER = LogManager.getLogger();

    /** Pattern for the filename property in the Content-Disposition HTTP Header field. */
    private final Pattern pattern = Pattern.compile("filename=\"(.+?)\"", Pattern.CASE_INSENSITIVE);

    /** One HTTP client can be used by many concurrent threads. */
    private final InterruptibleHttpClient httpClient;

    /** How many time an interrupted download will be resumed. */
    public final int downloadAttempts;

    /** Target product as a database object. */
    public final String url;

    /** Expected MD5 of data to download. */
    public final String md5;

    /** Content-Length as reported in the HTTP headers (HTTP payload). */
    public final long contentLength;

    /** Content-Type as reported in the HTTP headers. */
    public final String contentType;

    /** ETag HTTP header used for resuming interrupted downloads. */
    public final String ETag;

    /** `true` if remote supports HTTP ranges. */
    public final boolean canResume;

    /** filename as reported in the Content-Disposition. */
    public final String filename;

    /** Downloading thread. */
    private volatile Thread downloadThread = null;

    /** Downloads the given product (the URL returned by {@link Product#getOrigin()}) using the
     * given HTTP client.
     *
     * @param http_client A well configured HTTP client to download the product (not null)
     * @param download_attempts How many time an interrupted download will be resumed
     * @param to_download A product whose Origin is set (not null)
     * @param md5 hash of data to download, for verification purposes (not null)
     * @param default_name the default name of this product (not null)
     * @throws IOException if the HTTP client could not HEAD the origin of the product
     * @throws InterruptedException if current thread is interrupted
     */
    public DownloadableProduct(InterruptibleHttpClient http_client, int download_attempts, String to_download,
            String md5, String default_name) throws IOException, InterruptedException {
        Objects.requireNonNull(http_client);
        Objects.requireNonNull(to_download);
        Objects.requireNonNull(default_name);
        this.url = to_download;
        this.httpClient = http_client;
        this.md5 = md5;
        this.downloadAttempts = download_attempts;

        // HEADs the target to check availability and get its properties
        HttpResponse headrsp = http_client.interruptibleHead(to_download);
        if (headrsp.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
            raiseFailure(headrsp.getStatusLine(), headrsp.getFirstHeader("cause-message"));
        }
        // Gets the size of the payload, its ETag and its Accept-Ranges
        this.contentLength = Long.parseLong(headrsp.getFirstHeader("Content-Length").getValue());
        setProperty(ProductConstants.DATA_SIZE, this.contentLength);
        this.contentType = headrsp.getFirstHeader("Content-Type").getValue();
        Header etag = headrsp.getFirstHeader("ETag");
        if (etag != null) {
            this.canResume = headrsp.containsHeader("Accept-Ranges");
            this.ETag = etag.getValue();
        } else {
            this.ETag = null;
            canResume = false;
        }

        // Gets the filename from the HTTP header field `Content-Disposition'
        String contdis = headrsp.getFirstHeader("Content-Disposition").getValue();
        if (contdis != null && !contdis.isEmpty()) {
            Matcher m = pattern.matcher(contdis);
            if (m.find()) {
                this.filename = m.group(1);
            } else {
                this.filename = default_name;
            }
        } else {
            this.filename = default_name;
        }
    }

    @Override
    protected Class<?>[] implsTypes() {
        return new Class<?>[] { InputStream.class };
    }

    @Override
    public String getName() {
        return this.filename;
    }

    @Override
    public boolean hasImpl(Class<?> cl) {
        return InputStream.class.isAssignableFrom(cl);
    }

    @Override
    public <T> T getImpl(Class<? extends T> cl) {
        if (InputStream.class.isAssignableFrom(cl)) {
            try {
                Pipe pipe = Pipe.open();
                DownloadTask dltask = new DownloadTask(pipe);
                downloadThread = new Thread(dltask, "Product Download");
                downloadThread.start();

                InputStream is = Channels.newInputStream(pipe.source());
                return cl.cast(is);
            } catch (IOException ex) {
                LOGGER.error("could not create pipe", ex);
            }
        }
        return null;
    }

    /**
     * Interrupts the downloading thread.
     */
    public void close() throws IOException {
        if (downloadThread != null) {
            downloadThread.interrupt();
        }
    }

    /** raise an IOException with the given StatusLine and cause Header (cause may be null). */
    private void raiseFailure(StatusLine stl, Header cause) throws IOException {
        Formatter ff = new Formatter();
        ff.format("Cannot download %s, Reason='%s' (HTTP%d)", this.url, stl.getReasonPhrase(), stl.getStatusCode());
        if (cause != null) {
            String cause_msg = cause.getValue();
            if (cause_msg != null && !cause_msg.isEmpty()) {
                ff.format(" Cause='%s'", cause_msg);
            }
        }
        throw new IOException(ff.out().toString());
    }

    /** Download this.product, streams the data to the InputStreap implementation using a Pipe. */
    private class DownloadTask<IWC extends InterruptibleChannel & WritableByteChannel> implements Runnable {
        private final Pipe pipe;
        private int attempts;

        /** Create a new DownloadTask with an URL to download. */
        public DownloadTask(Pipe pipe) {
            this.pipe = pipe;
            this.attempts = downloadAttempts;
        }

        /**
         * In-thread code.
         * @return path to the downloaded data.
         */
        @Override
        public void run() {
            try {
                IWC output = (IWC) pipe.sink();

                // Computes the data's md5 sum on the fly
                MessageDigest md = null;
                if (md5 != null) {
                    md = MessageDigest.getInstance("MD5");
                    output = (IWC) new DigestIWC(md, pipe.sink());
                }

                // Download
                long delta = System.currentTimeMillis();
                if (canResume) {
                    // Range download
                    // Counts written bytes (FIXME: is it done by the HTTP client?)
                    CountingIWC decorator = new CountingIWC(output);
                    for (; this.attempts != 0 && decorator.currentCount() < contentLength; this.attempts--) {
                        long downloaded = decorator.currentCount();
                        LOGGER.debug("Download of {} : Range [{}, {}]", url, downloaded, contentLength);
                        try {
                            httpClient.interruptibleGetRange(url, decorator, ETag, downloaded, contentLength);
                        } catch (IOException ex) {
                            LOGGER.debug("Download of {} interrupted ({}, {})", url, ex.getClass().getName(),
                                    ex.getMessage());
                        }
                    }

                    // Check a condition that should always be false
                    if (decorator.currentCount() != contentLength) {
                        throw new IllegalStateException("Content-Legnth does not match downloaded bytes count");
                    }
                } else {
                    // Classic download
                    HttpResponse response = httpClient.interruptibleGet(url, output);

                    // If the response's status code is not 200, something wrong happened
                    if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
                        raiseFailure(response.getStatusLine(), response.getFirstHeader("cause-message"));
                    }
                }

                // Check MD5SUM
                if (md5 != null) {
                    String data_md5 = String.valueOf(Hex.encode(md.digest()));
                    if (!md5.equalsIgnoreCase(data_md5)) {
                        throw new DigestException(data_md5 + " != " + md5);
                    }
                }

                LOGGER.info("Product '{}' ({} bytes compressed) successfully downloaded from {} in {} ms", filename,
                        contentLength, url, System.currentTimeMillis() - delta);
                pipe.sink().close();
            } catch (Exception e) {
                if (InterruptedException.class.isAssignableFrom(e.getClass())) {
                    LOGGER.debug("Thread downloading {} from {} interrupted", filename, url);
                } else {
                    LOGGER.error("Download of {} from {} failed", filename, url, e);
                }
                try {
                    pipe.source().close(); // Will generate an IOException on the reader side
                } catch (IOException ex) {
                }
                try {
                    pipe.sink().close();
                } catch (IOException ex) {
                }
            }
        }
    }

}