org.archive.crawler.frontier.BdbMultipleWorkQueuesTest.java Source code

Java tutorial

Introduction

Here is the source code for org.archive.crawler.frontier.BdbMultipleWorkQueuesTest.java

Source

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.crawler.frontier;

import java.util.logging.Logger;

import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.modules.SchedulingConstants;
import org.archive.net.UURIFactory;

import com.sleepycat.je.tree.Key;

import junit.framework.TestCase;

/**
 * Unit tests for BdbMultipleWorkQueues functionality. 
 * 
 * @author gojomo
 */
public class BdbMultipleWorkQueuesTest extends TestCase {
    private static Logger logger = Logger.getLogger(BdbMultipleWorkQueuesTest.class.getName());

    /**
     * Basic sanity checks for calculateInsertKey() -- ensure ordinal, cost,
     * and schedulingDirective have the intended effects, for ordinal values
     * up through 1/4th of the maximum (about 2^61).
     * 
     * @throws URIException
     */
    public void testCalculateInsertKey() throws URIException {
        while (Thread.interrupted()) {
            logger.warning("stray interrupt cleared");
        }

        for (long ordinalOrigin = 1; ordinalOrigin < Long.MAX_VALUE / 4; ordinalOrigin <<= 1) {
            CrawlURI curi1 = new CrawlURI(UURIFactory.getInstance("http://archive.org/foo"));
            curi1.setOrdinal(ordinalOrigin);
            curi1.setClassKey("foo");
            byte[] key1 = BdbMultipleWorkQueues.calculateInsertKey(curi1).getData();
            CrawlURI curi2 = new CrawlURI(UURIFactory.getInstance("http://archive.org/bar"));
            curi2.setOrdinal(ordinalOrigin + 1);
            curi2.setClassKey("foo");
            byte[] key2 = BdbMultipleWorkQueues.calculateInsertKey(curi2).getData();
            CrawlURI curi3 = new CrawlURI(UURIFactory.getInstance("http://archive.org/baz"));
            curi3.setOrdinal(ordinalOrigin + 2);
            curi3.setClassKey("foo");
            curi3.setSchedulingDirective(SchedulingConstants.HIGH);
            byte[] key3 = BdbMultipleWorkQueues.calculateInsertKey(curi3).getData();
            CrawlURI curi4 = new CrawlURI(UURIFactory.getInstance("http://archive.org/zle"));
            curi4.setOrdinal(ordinalOrigin + 3);
            curi4.setClassKey("foo");
            curi4.setPrecedence(2);
            byte[] key4 = BdbMultipleWorkQueues.calculateInsertKey(curi4).getData();
            CrawlURI curi5 = new CrawlURI(UURIFactory.getInstance("http://archive.org/gru"));
            curi5.setOrdinal(ordinalOrigin + 4);
            curi5.setClassKey("foo");
            curi5.setPrecedence(1);
            byte[] key5 = BdbMultipleWorkQueues.calculateInsertKey(curi5).getData();
            // ensure that key1 (with lower ordinal) sorts before key2 (higher
            // ordinal)
            assertTrue("lower ordinal sorting first (" + ordinalOrigin + ")",
                    Key.compareKeys(key1, key2, null) < 0);
            // ensure that key3 (with HIGH scheduling) sorts before key2 (even
            // though
            // it has lower ordinal)
            assertTrue("lower directive sorting first (" + ordinalOrigin + ")",
                    Key.compareKeys(key3, key2, null) < 0);
            // ensure that key5 (with lower cost) sorts before key4 (even though 
            // key4  has lower ordinal and same default NORMAL scheduling directive)
            assertTrue("lower cost sorting first (" + ordinalOrigin + ")", Key.compareKeys(key5, key4, null) < 0);
        }
    }
}