dk.netarkivet.harvester.harvesting.metadata.MetadataFileWriterTester.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.harvester.harvesting.metadata.MetadataFileWriterTester.java

Source

/*
 * #%L
 * Netarchivesuite - harvester - test
 * %%
 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
 *             the National Library of France and the Austrian National Library.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 2.1 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
 * #L%
 */
package dk.netarkivet.harvester.harvesting.metadata;

import java.io.File;
import java.io.IOException;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.jwat.common.ANVLRecord;

import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.SlowTest;
import dk.netarkivet.testutils.TestResourceUtils;

@SuppressWarnings({ "unchecked", "rawtypes" })
public class MetadataFileWriterTester {

    @Rule
    public TestName test = new TestName();

    private static File logsDir = TestResourceUtils.getFile("crawldir/logs");

    private File WORKING_DIR;

    @Before
    public void initialize() {
        WORKING_DIR = new File(TestResourceUtils.OUTPUT_DIR,
                getClass().getSimpleName() + "/" + test.getMethodName());
        FileUtils.removeRecursively(WORKING_DIR);
        FileUtils.createDir(WORKING_DIR);
        if (!logsDir.exists() || !logsDir.isDirectory()) {
            Assert.fail("Test resource directory missing 'crawldir/logs'!");
        }
    }

    @Test
    public void testMetadataFileWriterArc() throws IOException {
        File metafile = getOutputArcFile("metadata.arc");
        MetadataFileWriter mdfw = MetadataFileWriterArc.createWriter(metafile);

        String uri = "http://www.netarkivet.dk/";
        long ctm = System.currentTimeMillis();

        SecureRandom random = new SecureRandom();
        byte[] payload = new byte[8192];
        random.nextBytes(payload);

        mdfw.write(uri, "application/binary", "127.0.0.1", ctm, payload);
        mdfw.close();
        metafile.deleteOnExit();

        File metadataArcFile = getOutputArcFile("42-metadata-1.arc");
        MetadataFileWriter mfwa = MetadataFileWriterArc.createWriter(metadataArcFile);
        for (File f : logsDir.listFiles()) {
            mfwa.writeFileTo(f, "metadata://netarkivet.dk/crawl/logs/" + f.getName(), "text/plain");
        }
    }

    @Test
    public void testMetadataFileWriterWarc() throws IOException {
        File metafile = getOutputArcFile("metadata.warc");
        MetadataFileWriter mdfw = MetadataFileWriterWarc.createWriter(metafile);

        String uri = "http://www.netarkivet.dk/";
        long ctm = System.currentTimeMillis();

        SecureRandom random = new SecureRandom();
        byte[] payload = new byte[8192];
        random.nextBytes(payload);

        mdfw.write(uri, "application/binary", "127.0.0.1", ctm, payload);
        mdfw.close();

        metafile.deleteOnExit();

        File metadataArcFile = getOutputArcFile("42-metadata-1.warc");
        MetadataFileWriter mfwa = MetadataFileWriterWarc.createWriter(metadataArcFile);
        ((MetadataFileWriterWarc) mfwa).insertInfoRecord(new ANVLRecord());

        for (File f : logsDir.listFiles()) {
            mfwa.writeFileTo(f, "metadata://netarkivet.dk/crawl/logs/" + f.getName(), "text/plain");
        }
    }

    /**
     * This is not run automatically, as this takes a long time to complete (15 seconds).
     */
    @Category(SlowTest.class)
    @Test
    public void notestMetadataFileWriterWarcMassiveLoadTest() throws IOException {
        // TODO verify content of produced warc-file to ensure that all is OK
        File metafile = getOutputArcFile("metadata.warc");
        MetadataFileWriterWarc mdfw = (MetadataFileWriterWarc) MetadataFileWriterWarc.createWriter(metafile);
        mdfw.insertInfoRecord(new ANVLRecord());
        // Create 5000 small files
        String contentPart = "blablabla";
        String someText = StringUtils.repeat(contentPart, 5000);
        List textArray = new ArrayList<String>();
        textArray.add(someText);
        Set<File> files = new HashSet<>();
        for (int i = 0; i < 10000; i++) {
            File f = File.createTempFile("metadata", "cdx");
            FileUtils.writeCollectionToFile(f, textArray);
            files.add(f);
        }
        int count = 0;
        for (File f : files) {
            mdfw.writeFileTo(f, "http://netarkivet/ressource-" + count, "text/plain");
            f.delete();
            count++;
        }
    }

    private File getOutputArcFile(String name) {
        File arcfile = new File(WORKING_DIR, name);
        try {
            arcfile.createNewFile();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return arcfile;
    }

}