dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord.java

Source

/* $Id$
 * $Date$
 * $Revision$
 * $Author$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
package dk.netarkivet.common.distribute.arcrepository;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.archive.io.ArchiveRecord;
import org.archive.io.arc.ARCRecord;
import org.archive.io.warc.WARCRecord;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.distribute.RemoteFile;
import dk.netarkivet.common.distribute.RemoteFileFactory;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.arc.ARCUtils;
import dk.netarkivet.common.utils.warc.WARCUtils;

/**
 * Class to hold the result of a lookup operation in the bitarchive:
 *    The metadata information associated with the record
 *    The actual byte content
 *    The name of the file the data were retrieved from
 *    If length of record exceeds value of
 *    Settings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE
 *    The record is stored in a RemoteFile.
 *
 */
public class BitarchiveRecord implements Serializable {

    /** The file the data were retrieved from. */
    private String fileName;

    /** The actual data. */
    private byte[] objectBuffer;

    /** The offset of the ArchiveRecord contained. */
    private long offset;

    /** The length of the ArchiveRecord contained. */
    private long length;

    /** The actual data as a remote file.*/
    private RemoteFile objectAsRemoteFile;

    /** Is the data stored in a RemoteFile. */
    private boolean isStoredAsRemoteFile = false;

    /** Set after deleting RemoteFile. */
    private boolean hasRemoteFileBeenDeleted = false;

    /** How large the ARCRecord can before saving as RemoteFile. */
    private final long LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER = Settings
            .getLong(CommonSettings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE);

    /** the log. */
    private static final transient Log log = LogFactory.getLog(BitarchiveRecord.class.getName());

    /**
     * Creates a BitarchiveRecord from the a ArchiveRecord, which can be either
     * a ARCRecord or WARCRecord. Note that record metadata is not included with
     * the BitarchiveRecord, only the payload of the record.
     * 
     * If the length of the record is higher than Settings
     * .BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE
     *  the data is stored in a RemoteFile, otherwise the data is stored in
     *  a byte array.
     * @param record the ArchiveRecord that the data should come from.  We do not
     * close the ArchiveRecord.
     * @param filename The filename of the ArchiveFile
    */
    public BitarchiveRecord(ArchiveRecord record, String filename) {
        ArgumentNotValid.checkNotNull(record, "ArchiveRecord record");
        ArgumentNotValid.checkNotNull(filename, "String filename");
        this.fileName = filename;
        this.offset = record.getHeader().getOffset();
        if (record instanceof ARCRecord) {
            length = record.getHeader().getLength();
        } else if (record instanceof WARCRecord) {
            // The length of the payload of the warc-record is not getLength(),
            // but getLength minus getContentBegin(), which is the number of
            // bytes used for the record-header!
            length = record.getHeader().getLength() - record.getHeader().getContentBegin();
        } else {
            throw new ArgumentNotValid("Unknown type of ArchiveRecord");
        }
        if (length > LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER) {
            // copy arc-data to local file and create a RemoteFile based on this
            log.info("Record exceeds limit of " + LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER + " bytes. Length is "
                    + length + " bytes, Storing as instance of " + Settings.get(CommonSettings.REMOTE_FILE_CLASS));
            if (RemoteFileFactory.isExtendedRemoteFile()) {
                objectAsRemoteFile = RemoteFileFactory.getExtendedInstance(record);
                isStoredAsRemoteFile = true;
            } else {
                File localTmpFile = null;
                try {
                    localTmpFile = File.createTempFile("BitarchiveRecord-" + fileName, ".tmp",
                            FileUtils.getTempDir());
                    record.dump(new FileOutputStream(localTmpFile));
                    objectAsRemoteFile = RemoteFileFactory.getMovefileInstance(localTmpFile);
                    isStoredAsRemoteFile = true;
                } catch (IOException e) {
                    throw new IOFailure("Unable to store record(" + fileName + "," + offset + ") as remotefile", e);
                }
            }
        } else { // Store data in objectbuffer
            try {
                if (record instanceof ARCRecord) {
                    objectBuffer = ARCUtils.readARCRecord((ARCRecord) record);
                } else if (record instanceof WARCRecord) {
                    objectBuffer = WARCUtils.readWARCRecord((WARCRecord) record);
                }
                log.debug("Bytes stored in objectBuffer: " + objectBuffer.length);
            } catch (IOException e) {
                throw new ExceptionInInitializerError(e);
            }
        }
    }

    /**
     * Returns the file that this information was loaded from.
     * @return the file that this ARC record comes from.
     */
    public String getFile() {
        return fileName;
    }

    /**
     * Returns the length of the ARCRecord contained.
     * @return the length of the ARCRecord contained
     */
    public long getLength() {
        return length;
    }

    /**
     * Retrieve the data in the record.
     * If data is in RemoteFile, this operation deletes the RemoteFile.
     * @throws IllegalState if remotefile already deleted
     * @return the data from the ARCRecord as an InputStream.
     */
    public InputStream getData() {
        InputStream result = null;
        if (isStoredAsRemoteFile) {
            if (hasRemoteFileBeenDeleted) {
                throw new IllegalState("RemoteFile has already been deleted");
            }
            log.info("Reading " + length + " bytes from RemoteFile");
            InputStream rfInputStream = objectAsRemoteFile.getInputStream();
            result = new FilterInputStream(rfInputStream) {
                public void close() throws IOException {
                    super.close();
                    objectAsRemoteFile.cleanup();
                    hasRemoteFileBeenDeleted = true;
                }
            };
        } else {
            log.debug("Reading " + length + " bytes from objectBuffer");
            result = new ByteArrayInputStream(objectBuffer);
        }
        return result;
    }

    /**
     * Deliver the data in the record to a given OutputStream.
     * If data is in RemoteFile, this operation deletes the RemoteFile
     * @param out deliver the data to this outputstream
     * @throws IOFailure
     *             if any IOException occurs reading or writing the data
     * @throws IllegalState if remotefile already deleted
     */
    public void getData(OutputStream out) {
        ArgumentNotValid.checkNotNull(out, "OutputStream out");
        if (isStoredAsRemoteFile) {
            if (hasRemoteFileBeenDeleted) {
                throw new IllegalState("RemoteFile has already been deleted");
            }
            try {
                log.debug("Reading " + length + " bytes from RemoteFile");
                objectAsRemoteFile.appendTo(out);
            } finally {
                log.trace("Deleting the RemoteFile '" + objectAsRemoteFile.getName() + "'.");
                objectAsRemoteFile.cleanup();
                hasRemoteFileBeenDeleted = true;
            }
        } else {
            try {
                log.debug("Reading " + length + " bytes from objectBuffer");
                out.write(objectBuffer, 0, objectBuffer.length);
            } catch (IOException e) {
                throw new IOFailure("Unable to write data from " + "objectBuffer to the outputstream", e);
            }
        }
    }
}