dk.netarkivet.common.distribute.HTTPRemoteFile.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.common.distribute.HTTPRemoteFile.java

Source

/* $Id$
 * $Revision$
 * $Author$
 * $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
package dk.netarkivet.common.distribute;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.security.DigestInputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.ChecksumCalculator;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.SystemUtils;

/**
 * A remote file implemented with point-to-point HTTP communication.
 * Optimised to communicate locally, if file is on the same host.
 * Optimised to transfer 0 byte files inline.
 */
public class HTTPRemoteFile extends AbstractRemoteFile {
    /** The default place in classpath where the settings file can be found. */
    private static String DEFAULT_SETTINGS_CLASSPATH = "dk/netarkivet/common/distribute/HTTPRemoteFileSettings.xml";

    /*
     * The static initialiser is called when the class is loaded.
     * It will add default values for all settings defined in this class, by
     * loading them from a settings.xml file in classpath.
     */
    static {
        Settings.addDefaultClasspathSettings(DEFAULT_SETTINGS_CLASSPATH);
    }

    /** The name of the host this file originated on. */
    protected final String hostname;
    /** The url that exposes this remote file. */
    protected final URL url;
    /** If useChecksums is true, contains the file checksum. */
    protected final String checksum;
    /** The logger for this class. */
    private static final Log log = LogFactory.getLog(AbstractRemoteFile.class);

    // NOTE: The constants defining setting names below are left non-final on
    // purpose! Otherwise, the static initialiser that loads default values
    // will not run.

    /** 
     * <b>settings.common.remoteFile.port</b>: <br>
     * The setting for the HTTP remotefile port number used. */
    public static String HTTPREMOTEFILE_PORT_NUMBER = "settings.common.remoteFile.port";

    /**
     * Initialises a remote file implemented by point-to-point HTTP
     * communication.
     * @param file The file to make a remote file for
     * @param useChecksums Whether communications are checksummed. If true,
     * getChecksum will also return the checksum.
     * @param fileDeletable if true, the file given to this method is deletable,
     * once it is transferred.
     * @param multipleDownloads if true, the file may be transferred more than
     * once. Otherwise, all file handles are attempted to be made invalid after
     * the first transfer, although no guarantees are made.
     *
     * @throws ArgumentNotValid if file is null, or not a readable file.
     * @throws IOFailure if checksums are requested, but i/o errors occur while
     * checksumming.
     */
    protected HTTPRemoteFile(File file, boolean useChecksums, boolean fileDeletable, boolean multipleDownloads) {
        super(file, useChecksums, fileDeletable, multipleDownloads);
        this.hostname = SystemUtils.getLocalHostName();
        if (filesize > 0) {
            this.url = getRegistry().registerFile(this.file, this.fileDeletable);
        } else {
            this.url = null;
        }
        if (useChecksums) {
            this.checksum = ChecksumCalculator.calculateMd5(file);
        } else {
            this.checksum = null;
        }
    }

    /**
     * Initialises a remote file implemented by point-to-point HTTP
     * communication.
     * @param f The file to make a remote file for
     * @param useChecksums Whether communications are checksummed. If true,
     * getChecksum will also return the checksum.
     * @param fileDeletable if true, the file given to this method is deletable,
     * once it is transferred.
     * @param multipleDownloads if true, the file may be transferred more than
     * once. Otherwise, all file handles are attempted to be made invalid after
     * the first transfer, although no guarantees are made.
     * @throws ArgumentNotValid if file is null, or not a readable file.
     * @throws IOFailure if checksums are requested, but i/o errors occur while
     * checksumming.
     */
    public static RemoteFile getInstance(File f, Boolean useChecksums, Boolean fileDeletable,
            Boolean multipleDownloads) {
        return new HTTPRemoteFile(f, useChecksums, fileDeletable, multipleDownloads);
    }

    /** Get the webserver registry for this class of files. Meant to be
     * subclassed for specialised versions of this file.
     * @return The reigstry. 
     */
    protected HTTPRemoteFileRegistry getRegistry() {
        return HTTPRemoteFileRegistry.getInstance();
    }

    /** Copy this remote file to the given file.
     * If the file resides on the current machine, remote file transfer is done
     * locally. Otherwise, the remote file is transferred over http.
     * If the file is not set to be able to be transferred multiple times, it is
     * cleaned up after the transfer.
     * @param destFile The file to write the remote file to.
     * @throws ArgumentNotValid on null destFile, or parent to destfile is not
     * a writeable directory, or destfile exists and cannot be overwritten.
     * @throws IOFailure on I/O trouble writing remote file to destination.
     */
    public void copyTo(File destFile) {
        ArgumentNotValid.checkNotNull(destFile, "File destFile");
        destFile = destFile.getAbsoluteFile();
        if ((!destFile.isFile() || !destFile.canWrite())
                && (!destFile.getParentFile().isDirectory() || !destFile.getParentFile().canWrite())) {
            throw new ArgumentNotValid(
                    "Destfile '" + destFile + "' does not point to a writable file for remote file '" + file + "'");
        }
        if (isLocal() && fileDeletable && !multipleDownloads && !useChecksums) {
            if (file.renameTo(destFile)) {
                cleanup();
                return;
            }
            //if rename fails we fall back to normal usage.
        }
        super.copyTo(destFile);
    }

    /** Get an input stream representing the remote file.
     * If the file resides on the current machine, the input stream is to the
     * local file. Otherwise, the remote file is transferred over http.
     * The close method of the input stream will cleanup this handle, and if
     * checksums are requested, will check the checksums on close.
     * If the file is not set to be able to be transferred multiple times, it is
     * cleaned up after the transfer.
     * @return An input stream for the remote file.
     * @throws IOFailure on I/O trouble generating inputstream for remote file.
     * Also, the returned remote file will throw IOFailure on close, if
     * checksums are requested, but do not match.
     */
    public InputStream getInputStream() {
        if (filesize == 0) {
            return new ByteArrayInputStream(new byte[] {});
        }
        try {
            InputStream is = null;
            if (isLocal()) {
                is = new FileInputStream(file);
            } else {
                URLConnection urlConnection = getRegistry().openConnection(url);
                //ensure not getting some cached version
                urlConnection.setUseCaches(false);
                is = urlConnection.getInputStream();
            }
            if (useChecksums) {
                is = new DigestInputStream(is, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
            }
            return new FilterInputStream(is) {
                public void close() {
                    if (useChecksums) {
                        String newChecksum = ChecksumCalculator
                                .toHex(((DigestInputStream) in).getMessageDigest().digest());
                        if (!newChecksum.equals(checksum)) {
                            throw new IOFailure(
                                    "Checksum mismatch! Expected '" + checksum + "' but was '" + newChecksum + "'");
                        }
                    }
                    if (!multipleDownloads) {
                        cleanup();
                    }
                }
            };
        } catch (IOException e) {
            throw new IOFailure("Unable to get inputstream for '" + file + "' from '" + url + "'", e);
        }
    }

    /**
     * Invalidate all file handles, by asking the remote registry to remove the
     * url for this remote file from the list of shared files.
     * Invalidating a file handle may delete the original files, if deletable.
     * This method does not throw exceptions, but will warn on errors.
     */
    public void cleanup() {
        if (filesize == 0) {
            return;
        }
        try {
            URLConnection urlConnection = getRegistry().openConnection(getRegistry().getCleanupUrl(url));
            urlConnection.setUseCaches(false);
            urlConnection.connect();
            urlConnection.getInputStream();
        } catch (IOException e) {
            log.warn("Unable to cleanup file '" + file.getAbsolutePath() + "' with URL'" + url + "'", e);
        }
    }

    /** Get checksum for file, or null if checksums were not requested.
     * @return checksum for file, or null if checksums were not requested.
     */
    public String getChecksum() {
        return checksum;
    }

    /** Helper method to determine if file resides on local machine.
     * @return true if the file is on the local machine, false otherwise.
     */
    protected boolean isLocal() {
        return SystemUtils.getLocalHostName().equals(hostname) && file.isFile() && file.canRead();
    }

    /**
     * Retrieval of the number of retries for retrieving a file from a HTTP
     * server. 
     * TODO define a setting for HTTP retries, just like for the FTP retries.
     * @return The number of retries. Currently a constant: 1.
     */
    @Override
    public int getNumberOfRetries() {
        // TODO make settings for this. 
        return 1;
    }
}