org.apache.tika.parser.mock.MockParserTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tika.parser.mock.MockParserTest.java

Source

package org.apache.tika.parser.mock;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.Date;

import org.apache.commons.io.IOUtils;
import org.apache.tika.TikaTest;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.junit.Test;

public class MockParserTest extends TikaTest {
    private final static String M = "/test-documents/mock/";
    private final static Parser PARSER = new AutoDetectParser();

    @Override
    public XMLResult getXML(String path, Metadata m) throws Exception {
        //note that this is specific to MockParserTest with addition of M to the path!
        InputStream is = getResourceAsStream(M + path);
        try {
            return super.getXML(is, PARSER, m);
        } finally {
            IOUtils.closeQuietly(is);
        }
    }

    @Test
    public void testExample() throws Exception {
        Metadata m = new Metadata();
        PrintStream out = System.out;
        PrintStream err = System.err;
        ByteArrayOutputStream outBos = new ByteArrayOutputStream();
        ByteArrayOutputStream errBos = new ByteArrayOutputStream();
        PrintStream tmpOut = new PrintStream(outBos, true, UTF_8.toString());
        PrintStream tmpErr = new PrintStream(errBos, true, UTF_8.toString());
        System.setOut(tmpOut);
        System.setErr(tmpErr);
        try {
            assertThrowable("example.xml", m, IOException.class, "not another IOException");
            assertMockParser(m);
        } finally {
            System.setOut(out);
            System.setErr(err);
        }
        String outString = new String(outBos.toByteArray(), UTF_8);
        assertContains("writing to System.out", outString);

        String errString = new String(errBos.toByteArray(), UTF_8);
        assertContains("writing to System.err", errString);

    }

    @Test
    public void testNothingBad() throws Exception {
        Metadata m = new Metadata();
        String content = getXML("nothing_bad.xml", m).xml;
        assertEquals("Geoffrey Chaucer", m.get("author"));
        assertContains("<p>And bathed every veyne in swich licour,</p>", content);
        assertMockParser(m);
    }

    @Test
    public void testNullPointer() throws Exception {
        Metadata m = new Metadata();
        assertThrowable("null_pointer.xml", m, NullPointerException.class, "another null pointer exception");
        assertMockParser(m);
    }

    @Test
    public void testNullPointerNoMsg() throws Exception {
        Metadata m = new Metadata();
        assertThrowable("null_pointer_no_msg.xml", m, NullPointerException.class, null);
        assertMockParser(m);
    }

    @Test
    public void testSleep() throws Exception {
        long start = new Date().getTime();
        Metadata m = new Metadata();
        String content = getXML("sleep.xml", m).xml;
        assertMockParser(m);
        long elapsed = new Date().getTime() - start;
        //should sleep for at least 3000
        boolean enoughTimeHasElapsed = elapsed > 2000;
        assertTrue("not enough time has not elapsed: " + elapsed, enoughTimeHasElapsed);
        assertMockParser(m);
    }

    @Test
    public void testHeavyHang() throws Exception {
        long start = new Date().getTime();
        Metadata m = new Metadata();

        String content = getXML("heavy_hang.xml", m).xml;
        assertMockParser(m);
        long elapsed = new Date().getTime() - start;
        //should sleep for at least 3000
        boolean enoughTimeHasElapsed = elapsed > 2000;
        assertTrue("not enough time has elapsed: " + elapsed, enoughTimeHasElapsed);
        assertMockParser(m);
    }

    @Test
    public void testFakeOOM() throws Exception {
        Metadata m = new Metadata();
        assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not another oom");
        assertMockParser(m);
    }

    @Test
    public void testRealOOM() throws Exception {
        //Note: we're not actually testing the diff between fake and real oom
        //i.e. by creating child process and setting different -Xmx or
        //memory profiling.
        Metadata m = new Metadata();
        assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap space");
        assertMockParser(m);
    }

    @Test
    public void testInterruptibleSleep() {
        //Without static initialization of the parser, it can take ~1 second after t.start()
        //before the parser actually calls parse.  This is
        //just the time it takes to instantiate and call AutoDetectParser, do the detection, etc.
        //This is not thread creation overhead.
        ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
        Thread t = new Thread(r);
        t.start();
        long start = new Date().getTime();
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            //swallow
        }

        t.interrupt();

        try {
            t.join(10000);
        } catch (InterruptedException e) {
            //swallow
        }
        long elapsed = new Date().getTime() - start;
        boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
        assertTrue("elapsed (" + elapsed + " millis) was not short enough", shortEnough);
    }

    @Test
    public void testNonInterruptibleSleep() {
        ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
        Thread t = new Thread(r);
        t.start();
        long start = new Date().getTime();
        try {
            //make sure that the thread has actually started
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            //swallow
        }
        t.interrupt();
        try {
            t.join(20000);
        } catch (InterruptedException e) {
            //swallow
        }
        long elapsed = new Date().getTime() - start;
        boolean longEnough = elapsed >= 3000;//the xml file specifies 3000, this sleeps 1000
        assertTrue("elapsed (" + elapsed + " millis) was not long enough", longEnough);
    }

    private class ParserRunnable implements Runnable {
        private final String path;

        ParserRunnable(String path) {
            this.path = path;
        }

        @Override
        public void run() {
            Metadata m = new Metadata();
            try {
                getXML(path, m);
            } catch (Exception e) {
                throw new RuntimeException(e);
            } finally {
                assertMockParser(m);
            }
        }
    }

    private void assertThrowable(String path, Metadata m, Class<? extends Throwable> expected, String message) {

        try {
            getXML(path, m);
        } catch (Throwable t) {
            //if this is a throwable wrapped in a TikaException, use the cause
            if (t instanceof TikaException && t.getCause() != null) {
                t = t.getCause();
            }
            if (!(t.getClass().isAssignableFrom(expected))) {
                fail(t.getClass() + " is not assignable from " + expected);
            }
            if (message != null) {
                assertEquals(message, t.getMessage());
            }
        }
    }

    private void assertMockParser(Metadata m) {
        String[] parsers = m.getValues("X-Parsed-By");
        //make sure that it was actually parsed by mock.
        boolean parsedByMock = false;
        for (String parser : parsers) {
            if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
                parsedByMock = true;
                break;
            }
        }
        assertTrue("mock parser should have been called", parsedByMock);
    }
}