dk.netarkivet.archive.arcrepository.ARCLookupTester.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.archive.arcrepository.ARCLookupTester.java

Source

/*
 * #%L
 * Netarchivesuite - archive - test
 * %%
 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
 *             the National Library of France and the Austrian National Library.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 2.1 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
 * #L%
 */
package dk.netarkivet.archive.arcrepository;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

//import org.archive.io.arc.ARCConstants;
import org.archive.format.arc.ARCConstants;
import org.archive.io.arc.ARCReader;
import org.archive.io.arc.ARCReaderFactory;
import org.archive.io.arc.ARCRecord;
import org.archive.io.arc.ARCRecordMetaData;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import dk.netarkivet.archive.ArchiveSettings;
import dk.netarkivet.archive.arcrepository.distribute.JMSArcRepositoryClient;
import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.distribute.ChannelsTesterHelper;
import dk.netarkivet.common.distribute.JMSConnectionMockupMQ;
import dk.netarkivet.common.distribute.arcrepository.ARCLookup;
import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClient;
import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.StreamUtils;
import dk.netarkivet.common.utils.arc.ARCKey;
import dk.netarkivet.testutils.ReflectUtils;
import dk.netarkivet.testutils.StringAsserts;
import dk.netarkivet.testutils.TestFileUtils;
import dk.netarkivet.testutils.preconfigured.ReloadSettings;

@SuppressWarnings({ "unchecked" })
public class ARCLookupTester {
    private ViewerArcRepositoryClient realArcRepos;
    private static ARCLookup lookup;
    protected ARCReader arcReader;
    ReloadSettings rs = new ReloadSettings();

    @Before
    public void setUp() throws Exception {
        rs.setUp();
        dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL = new URI(
                "http://netarkivet.dk/netarchive_alm/billeder/spacer.gif");

        JMSConnectionMockupMQ.useJMSConnectionMockupMQ();
        ChannelsTesterHelper.resetChannels();

        // Although we also need some real data
        File WORKING_DIR = dk.netarkivet.archive.distribute.arcrepository.TestInfo.WORKING_DIR;

        FileUtils.removeRecursively(WORKING_DIR);
        TestFileUtils.copyDirectoryNonCVS(dk.netarkivet.archive.distribute.arcrepository.TestInfo.ORIGINALS_DIR,
                WORKING_DIR);
        FileUtils.createDir(new File(WORKING_DIR, "viewerproxy"));

        Settings.set(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN,
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.LOG_PATH.getAbsolutePath());
        Settings.set(CommonSettings.USE_REPLICA_ID, "ONE");
        Settings.set(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN,
                new File(WORKING_DIR, "admin-data").getAbsolutePath());
        Settings.set(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN,
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.LOG_PATH.getAbsolutePath());

        // This is "real" in the sense that it returns records from a
        // bitarchive-
        // like file system instaed of making them up.
        realArcRepos = new LocalArcRepositoryClient(
                new File(dk.netarkivet.archive.distribute.arcrepository.TestInfo.ARCHIVE_DIR, "filedir"));

        lookup = new ARCLookup(realArcRepos);
        lookup.setIndex(dk.netarkivet.archive.distribute.arcrepository.TestInfo.INDEX_DIR_2_3);
    }

    @After
    public void tearDown() throws Exception {
        if (realArcRepos != null) {
            realArcRepos.close();
        }
        if (arcReader != null) {
            arcReader.close();
        }
        FileUtils.removeRecursively(dk.netarkivet.archive.distribute.arcrepository.TestInfo.WORKING_DIR);
        JMSConnectionMockupMQ.clearTestQueues();
        rs.tearDown();
    }

    @Test(expected = ArgumentNotValid.class)
    public void testSetNullIndex() throws Exception {
        ArcRepositoryClient arcrep = new TestArcRepositoryClient();
        ARCLookup lookup = new ARCLookup(arcrep);
        lookup.setIndex(null);
    }

    @Test(expected = ArgumentNotValid.class)
    public void testSetInvalidIndex() throws Exception {
        ArcRepositoryClient arcrep = new TestArcRepositoryClient();
        ARCLookup lookup = new ARCLookup(arcrep);
        lookup.setIndex(TestInfo.TMP_FILE);
    }

    @Test
    public void testSetIndex() throws Exception {
        ArcRepositoryClient arcrep = new TestArcRepositoryClient();
        ARCLookup lookup = new ARCLookup(arcrep);
        lookup.setIndex(dk.netarkivet.archive.distribute.arcrepository.TestInfo.INDEX_DIR_2_3);
        lookup.setIndex(dk.netarkivet.archive.distribute.arcrepository.TestInfo.INDEX_DIR_2_3);
    }

    /**
     * Test that lookup returns real arcrecord data, and that it is correct TODO: This test is bad: It may not clean up
     * properly on fail, and it is really an integrity test. Move and clean up!
     */
    @Test
    public void testLookup() throws Exception {
        Settings.set(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR,
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.ARCHIVE_DIR.getAbsolutePath());
        Settings.set(CommonSettings.DIR_COMMONTEMPDIR,
                new File(dk.netarkivet.archive.distribute.arcrepository.TestInfo.WORKING_DIR, "serverdir")
                        .getAbsolutePath());

        // Get the data from the bitarchive
        InputStream is = lookup.lookup(dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL)
                .getInputStream();
        assertNotNull("Should be able to find image", is);
        byte[] got = readFully(is);
        is.close();
        // Read the expected result from the "local" copy
        String filename = dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL_KEY.getFile().getName();
        File in = new File(dk.netarkivet.archive.distribute.arcrepository.TestInfo.WORKING_DIR, filename);
        arcReader = ARCReaderFactory.get(in,
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL_KEY.getOffset());
        ARCRecord arc = (ARCRecord) arcReader.get();
        BitarchiveRecord result = new BitarchiveRecord(arc, filename);
        arc.close();
        byte[] wanted = StreamUtils.inputStreamToBytes(result.getData(), (int) result.getLength());
        assertEquals("Did not get expected data: ", new String(got), new String(wanted));
    }

    /**
     * Test that when a uri with escaped characters is looked up, the uri is urldecoded first.
     */
    @Test
    public void testLookupWithCurlyBrackets() throws Exception {
        Settings.set(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR,
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.ARCHIVE_DIR.getAbsolutePath());
        Settings.set(CommonSettings.DIR_COMMONTEMPDIR,
                new File(dk.netarkivet.archive.distribute.arcrepository.TestInfo.WORKING_DIR, "serverdir")
                        .getAbsolutePath());
        Field searcher_field = ARCLookup.class.getDeclaredField("luceneSearcher");
        searcher_field.setAccessible(true);
        // Set the searcher to null. lookup will then throw a message with the
        // actual URI
        searcher_field.set(lookup, null);
        try {
            lookup.lookup(new URI("http://www.adomain.dk/?key=%7B12345%7D"));
            fail("Should get IOFailure when lucene lookup is null");
        } catch (IOFailure e) {
            assertTrue("Expect error message to contain decoded uri but was '" + e.getMessage() + "'",
                    e.getMessage().contains("http://www.adomain.dk/?key={12345}"));
        }
    }

    @Test
    public void testLuceneLookup() throws NoSuchMethodException, IllegalAccessException, InvocationTargetException {
        Method luceneLookup = ReflectUtils.getPrivateMethod(ARCLookup.class, "luceneLookup", String.class);
        ARCKey key = (ARCKey) luceneLookup.invoke(lookup, "http://foo.bar");
        assertNull("Should get null key on not found", key);

        key = (ARCKey) luceneLookup.invoke(lookup, "http://www.raeder.dk/robots.txt");
        assertEquals("Should have found right file",
                "2-2-20060731110420-00000-sb-test-har-001.statsbiblioteket.dk.arc", key.getFile().getName());
        assertEquals("Should have found right offset", 1941, key.getOffset());
    }

    /**
     * Test that asking for a given URL makes ARCArchiveAccess ask the right stuff in the arc repository client and
     * returns it correctly.
     *
     * @throws Exception
     */
    @Test
    public void testLookupInputStream() throws Exception {
        realArcRepos.close();
        lookup = new ARCLookup(new TestArcRepositoryClient());
        lookup.setIndex(dk.netarkivet.archive.distribute.arcrepository.TestInfo.INDEX_DIR_2_3);
        InputStream is = lookup.lookup(dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL)
                .getInputStream();
        // Read a header line
        String line1 = readLine(is);
        // Read blank line
        String line2 = readLine(is);
        // Read blank line
        String line3 = readLine(is);
        // Read rest of content
        String s = new String(readFully(is));
        ARCKey retKey = new ARCKey(s.substring(0, s.indexOf(" ")), Long.parseLong(s.substring(s.indexOf(" ") + 1)));
        is.close();
        assertEquals("Result should start with status code", "HTTP/1.1 200 OK", line1);
        assertEquals("Location should be set to the ARC file",
                "Location: " + dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL_KEY.getFile(),
                line2);
        assertEquals("There should be an empty line after the headers", "", line3);
        assertEquals("Should get right file for gif",
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL_KEY.getFile(), retKey.getFile());
        assertEquals("Should get right offset for gif",
                dk.netarkivet.archive.distribute.arcrepository.TestInfo.GIF_URL_KEY.getOffset(),
                retKey.getOffset());
    }

    private class LocalArcRepositoryClient extends JMSArcRepositoryClient {
        File fileDir;

        public LocalArcRepositoryClient(File fileDir) {
            this.fileDir = fileDir;
        }

        public BitarchiveRecord get(String arcFile, long index) {
            try {
                ARCReader reader = ARCReaderFactory.get(new File(fileDir, arcFile), index);
                ARCRecord record = (ARCRecord) reader.get();
                return new BitarchiveRecord(record, arcFile);
            } catch (IOException e) {
                fail("Can't find file " + arcFile + ": " + e);
                return null;
            }
        }
    }

    /**
     * Fake arc repository client which on get returns a fake record which is ok.
     */
    private class TestArcRepositoryClient extends JMSArcRepositoryClient {
        public TestArcRepositoryClient() {
            super();
        }

        /**
         * Returns an OK BitarchiveRecord. Content is simply arcfile name and index encoded in a stream.
         */
        public BitarchiveRecord get(String arcFile, long index) {
            final Map<String, Object> metadata = new HashMap<String, Object>();
            for (String header_field : (List<String>) ARCConstants.REQUIRED_VERSION_1_HEADER_FIELDS) {
                metadata.put(header_field, "");
            }
            metadata.put(ARCConstants.ABSOLUTE_OFFSET_KEY, Long.valueOf(0L)); // Dummy
            // offset
            byte[] data = ("HTTP/1.1 200 OK\nLocation: " + arcFile + "\n\n" + arcFile + " " + index).getBytes();
            // TODO replace this by something else, or remove ?
            // (ARCConstants.LENGTH_HEADER_FIELD_KEY)
            // does not exist in Heritrix 1.10+
            // metadata.put(ARCConstants.LENGTH_HEADER_FIELD_KEY,
            // Integer.toString(data.length));
            // Note: ARCRecordMetadata.getLength() now reads the contents of the
            // LENGTH_FIELD_KEY
            // instead of LENGTH_HEADER_FIELD_KEY
            metadata.put(ARCConstants.LENGTH_FIELD_KEY, Integer.toString(data.length));
            try {
                ARCRecordMetaData meta = new ARCRecordMetaData(arcFile, metadata);
                return new BitarchiveRecord(new ARCRecord(new ByteArrayInputStream(data), meta), arcFile);
            } catch (IOException e) {
                fail("Cant't create metadata record");
                return null;
            }
        }

    }

    private byte[] readFully(InputStream is) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        int i = is.read();
        while (i != -1) {
            baos.write(i);
            i = is.read();
        }
        return baos.toByteArray();
    }

    /**
     * Read a line of bytes from an InputStream. Useful when an InputStream may contain both text and binary data.
     *
     * @param inputStream A source of data
     * @return A line of text read from inputStream, with terminating \r\n or \n removed, or null if no data is
     * available.
     * @throws IOException on trouble reading from input stream
     */
    private String readLine(InputStream inputStream) throws IOException {
        byte[] rawdata = readRawLine(inputStream);
        if (rawdata == null) {
            return null;
        }
        int len = rawdata.length;
        if (len > 0) {
            if (rawdata[len - 1] == '\n') {
                len--;
                if (len > 0) {
                    if (rawdata[len - 1] == '\r') {
                        len--;
                    }
                }
            }
        }
        return new String(rawdata, 0, len);
    }

    /**
     * Reads a raw line from an InputStream, up till \n. Since HTTP allows \r\n and \n as terminators, this gets the
     * whole line. This code is adapted from org.apache.commons.httpclient.HttpParser
     *
     * @param inputStream A stream to read from.
     * @return Array of bytes read or null if none are available.
     * @throws IOException if the underlying reads fail
     */
    private static byte[] readRawLine(InputStream inputStream) throws IOException {
        ByteArrayOutputStream buf = new ByteArrayOutputStream();
        int ch;
        while ((ch = inputStream.read()) >= 0) {
            buf.write(ch);
            if (ch == '\n') {
                break;
            }
        }
        if (buf.size() == 0) {
            return null;
        }
        return buf.toByteArray();
    }
}