dk.netarkivet.harvester.harvesting.metadata.MetadataFileWriterArc.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.harvester.harvesting.metadata.MetadataFileWriterArc.java

Source

/*
 * #%L
 * Netarchivesuite - harvester
 * %%
 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
 *             the National Library of France and the Austrian National Library.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 2.1 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
 * #L%
 */

package dk.netarkivet.harvester.harvesting.metadata;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;

import org.apache.commons.io.IOUtils;
import org.jwat.arc.ArcFileNaming;
import org.jwat.arc.ArcFileNamingSingleFile;
import org.jwat.arc.ArcFileWriter;
import org.jwat.arc.ArcFileWriterConfig;
import org.jwat.arc.ArcHeader;
import org.jwat.arc.ArcRecord;
import org.jwat.arc.ArcRecordBase;
import org.jwat.arc.ArcVersion;
import org.jwat.arc.ArcVersionBlock;
import org.jwat.arc.ArcVersionHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.SystemUtils;

/**
 * MetadataFileWriter that writes to ARC files.
 */
public class MetadataFileWriterArc extends MetadataFileWriter {

    private static final Logger log = LoggerFactory.getLogger(MetadataFileWriterArc.class);

    /** Writer to this jobs metadatafile. This is closed when the metadata is marked as ready. */
    private ArcFileWriter writer = null;

    /**
     * Create a <code>MetadataFileWriter</code> for ARC output.
     *
     * @param metadataARCFile The metadata ARC <code>File</code>
     * @return <code>MetadataFileWriter</code> for writing metadata files in ARC
     */
    public static MetadataFileWriter createWriter(File metadataARCFile) {
        MetadataFileWriterArc mtfw = new MetadataFileWriterArc();
        ArcFileNaming naming = new ArcFileNamingSingleFile(metadataARCFile);
        ArcFileWriterConfig config = new ArcFileWriterConfig(metadataARCFile.getParentFile(), false, Long.MAX_VALUE,
                true);
        mtfw.writer = ArcFileWriter.getArcWriterInstance(naming, config);
        mtfw.open();
        return mtfw;
    }

    protected void open() {
        ArcVersionHeader versionHeader;
        ArcRecordBase record;
        byte[] versionHeaderBytes;
        try {
            writer.open();
            versionHeader = ArcVersionHeader.create(ArcVersion.VERSION_1, "InternetArchive");
            versionHeader.rebuild();
            versionHeaderBytes = versionHeader.getHeader();
            record = ArcVersionBlock.createRecord(writer.writer);
            record.header.recordFieldVersion = 1;
            record.header.urlStr = "filedesc://" + writer.getFile().getName();
            record.header.ipAddressStr = "0.0.0.0";
            record.header.archiveDate = new Date();
            record.header.contentTypeStr = "text/plain";
            record.header.archiveLength = new Long(versionHeaderBytes.length);
            writer.writer.writeHeader(record);
            writer.writer.writePayload(versionHeaderBytes);
            writer.writer.closeRecord();
        } catch (IOException e) {
            throw new IOFailure("Error opening MetadataFileWriterArc", e);
        }
    }

    @Override
    public void close() {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException e) {
                throw new IOFailure("Error closing MetadataFileWriterArc", e);
            }
            writer = null;
        }
    }

    @Override
    public File getFile() {
        return writer.getFile();
    }

    @Override
    public void writeFileTo(File file, String uri, String mime) {
        writeTo(file, uri, mime);
    }

    /**
     * Writes a File to an ArcWriter, if available, otherwise logs the failure to the class-logger.
     *
     * @param fileToArchive the File to archive
     * @param URL the URL with which it is stored in the arcfile
     * @param mimetype The mimetype of the File-contents
     * @return true, if file exists, and is written to the arcfile.
     */
    @Override
    public boolean writeTo(File fileToArchive, String URL, String mimetype) {
        if (!fileToArchive.isFile()) {
            throw new IOFailure("Not a file: " + fileToArchive.getPath());
        }
        log.info("Writing file '{}' to ARC file: {}", fileToArchive, fileToArchive.length());
        InputStream in = null;
        try {
            ArcRecordBase record = ArcRecord.createRecord(writer.writer);
            ArcHeader header = record.header;
            header.urlStr = URL;
            header.archiveDate = new Date(fileToArchive.lastModified());
            header.ipAddressStr = SystemUtils.getLocalIP();
            header.contentTypeStr = mimetype;
            header.archiveLength = fileToArchive.length();
            in = new FileInputStream(fileToArchive);
            writer.writer.writeHeader(record);
            writer.writer.streamPayload(in);
            writer.writer.closeRecord();
        } catch (FileNotFoundException e) {
            throw new IOFailure("Unable to open file: " + fileToArchive.getPath(), e);
        } catch (IOException e) {
            throw new IOFailure("Epic IO fail while writing to ARC file: " + fileToArchive.getPath(), e);
        } finally {
            IOUtils.closeQuietly(in);
        }
        return true;
    }

    /* Copied from the ArcWriter. (Before change to JWAT) */
    @Override
    public void write(String uri, String contentType, String hostIP, long fetchBeginTimeStamp, byte[] payload)
            throws IOException {
        ByteArrayInputStream in = null;
        try {
            ArcRecordBase record = ArcRecord.createRecord(writer.writer);
            ArcHeader header = record.header;
            header.urlStr = uri;
            header.archiveDate = new Date(fetchBeginTimeStamp);
            header.ipAddressStr = hostIP;
            header.archiveLength = new Long(payload.length);
            header.contentTypeStr = contentType;
            in = new ByteArrayInputStream(payload);
            writer.writer.writeHeader(record);
            writer.writer.streamPayload(in);
            writer.writer.closeRecord();
        } catch (IOException e) {
            throw new IOFailure("Epic IO fail while writing payload to ARC file.", e);
        } finally {
            IOUtils.closeQuietly(in);
        }
    }

}