Java tutorial
/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.state; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.util.Arrays; import org.apache.commons.httpclient.URIException; import org.apache.commons.lang.SerializationUtils; import org.archive.modules.CrawlURI; import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.util.Recorder; import org.archive.util.TmpDirTestCase; import junit.framework.TestCase; /** * Base class for unit testing Module implementations. * * @author pjack */ public abstract class ModuleTestBase extends TestCase { /** * Magical constructor that attempts to auto-create static key field * descriptions for your module class. * * <p>If {@link #getSourceCodeDir} and {@link #getResourceDir} both return * non-null values, then the constructor will look in the resources * directory for an English resource file for the class. If it finds * one, nothing magical happens. * * <p>Otherwise, the source code for the module being tested is loaded, * and parsed to extract the JavaDoc descriptions for the static key * fields. The results are stored in the appropriate English locale file * in the resource directory. * * <p>Note the parsing is naive; at minimum, you should load the resulting * locale file and remove any HTML markup. */ public ModuleTestBase() { getSourceCodeDir(); getResourceDir(); } /** * Returns the location of the source code directory for your project. * This defaults to "src/main/java", which is the standard for projects * built with maven2. If you use a different source code directory, * you should override this method. * * <p>If you want to disable automatic key description generation, * return null from this method. * * @return the source code directory for the project */ protected File getSourceCodeDir() { return getProjectDir("src/main/java"); } /** * Returns the location of the Java resources directory for your project. * This defaults to "src/resources/java", which is the standard for projects * built with maven2. If you use a different source code directory -- * for instance, if your resources directory is the same as your source * code directory -- you should override this method. * * <p>If you want to disable automatic key description generation, * return null from this method. * * @return the source code directory for the project */ protected File getResourceDir() { return getProjectDir("src/main/resources"); } /** * Returns a project directory for a Heritrix subproject. This is here * so that the src and resources directories can be found whether the * unit test is run using maven2 or using Eclipse. The two build systems * use different working directories. * * @param path the path the path to find * @return the found path */ private File getProjectDir(String path) { File r = new File(path); if (r.exists()) { return r; } String cname = getClass().getName(); if (cname.startsWith("org.archive.processors")) { return new File("modules/" + path); } if (cname.startsWith("org.archive.deciderules")) { return new File("modules/" + path); } if (cname.startsWith("org.archive.crawler")) { return new File("engine/" + path); } return null; } /** * Returns the class of the module to test. Deduces from * test class name if possible. * * @return the class of the module to test */ protected Class<?> getModuleClass() { String myClassName = this.getClass().getCanonicalName(); if (!myClassName.endsWith("Test")) { throw new UnsupportedOperationException("Cannot get module class of " + myClassName); } String moduleClassName = myClassName.substring(0, myClassName.length() - 4); try { return Class.forName(moduleClassName); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } /** * Return an example instance of the module. This is used by * testSerialization to ensure the module can be serialized. * * @return an example instance of the module * @throws Exception if the module cannot be constructed for any reason */ protected Object makeModule() throws Exception { return getModuleClass().newInstance(); } /** * Tests that the module can be serialized. The value returned by * {@link #makeModule} is serialized to a byte array, and then * deserialized, and then serialized to a second byte array. The results * are passed to {@link #verifySerialization}, which will simply compare * the two byte arrays for equality. (That won't always work; see * that method for details). * * <p>If nothing else, this test is useful for catching NotSerializable * exceptions for your module or classes it depends on. * * @throws Exception if the module cannot be serialized */ public void testSerializationIfAppropriate() throws Exception { Object first = makeModule(); if (!(first instanceof Serializable)) { return; } byte[] firstBytes = SerializationUtils.serialize((Serializable) first); Object second = SerializationUtils.deserialize(firstBytes); byte[] secondBytes = SerializationUtils.serialize((Serializable) second); Object third = SerializationUtils.deserialize(secondBytes); byte[] thirdBytes = SerializationUtils.serialize((Serializable) third); // HashMap serialization reverses order of items in linked buckets // each roundtrip -- so don't check one roundtrip, check two. // // NOTE This is JVM-dependent behaviour, and since <= 1.7.0_u51 this // ordering of serialisation cannot be relied upon. However, a TreeMap // can be used instead of a HashMap, and this appears to have // predictable serialisation behaviour. // // @see // http://stackoverflow.com/questions/22392258/serialization-round-trip-of-hash-map-does-not-preserve-order // // verifySerialization(first, firstBytes, second, secondBytes); verifySerialization(first, firstBytes, third, thirdBytes); } /** * Verifies that serialization was successful. * * <p>By default, this method simply compares the first and second byte * arrays for equality. That may not work if you use custom serialization * -- for instance, if you're serializing a timestamp. If that's the case * you should override this method to compare the given objects, or to * simply do nothing. (If this method does nothing, then the * {@link #testSerialization} test is still useful for catching * NotSerializable problems). * * @param first the first object that was serialized * @param firstBytes the byte array the first object was serialized to * @param second the second object that was serialized * @param secondBytes the byte array the second object was serialized to * @throws Exception if anyt problem occurs */ protected void verifySerialization(Object first, byte[] firstBytes, Object second, byte[] secondBytes) throws Exception { assertTrue(Arrays.equals(firstBytes, secondBytes)); } @Override protected void runTest() throws Throwable { try { super.runTest(); } catch (Throwable t) { t.printStackTrace(); throw t; } } protected Recorder getRecorder() throws IOException { if (Recorder.getHttpRecorder() == null) { Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(), getClass().getName(), 16 * 1024, 512 * 1024); Recorder.setHttpRecorder(httpRecorder); } return Recorder.getHttpRecorder(); } protected CrawlURI makeCrawlURI(String uri) throws URIException, IOException { UURI uuri = UURIFactory.getInstance(uri); CrawlURI curi = new CrawlURI(uuri); curi.setSeed(true); curi.setRecorder(getRecorder()); return curi; } }