org.apache.nutch.storage.TestGoraStorage.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nutch.storage.TestGoraStorage.java

Source

/*******************************************************************************
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package org.apache.nutch.storage;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.avro.util.Utf8;
import org.apache.commons.io.IOUtils;
import org.apache.gora.query.Result;
import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.AbstractNutchTest;
import org.apache.nutch.util.CrawlTestUtil;
import org.hsqldb.Server;
import org.junit.Ignore;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;

/**
 * Tests basic Gora functionality by writing and reading webpages.
 */
public class TestGoraStorage extends AbstractNutchTest {

    @Override
    @Before
    public void setUp() throws Exception {
        super.setUp();
    }

    @Override
    @After
    public void tearDown() throws Exception {
        super.tearDown();
    }

    /**
     * Sequentially read and write pages to a store.
     * 
     * @throws Exception
     */
    @Test
    public void testSinglethreaded() throws Exception {
        String id = "singlethread";
        readWrite(id, webPageStore);
    }

    private static void readWrite(String id, DataStore<String, WebPage> store) throws IOException, Exception {
        WebPage page = new WebPage();
        int max = 1000;
        for (int i = 0; i < max; i++) {
            // store a page with title
            String key = "key-" + id + "-" + i;
            String title = "title" + i;
            page.setTitle(new Utf8(title));
            store.put(key, page);
            store.flush();

            // retrieve page and check title
            page = store.get(key);
            assertNotNull(page);
            assertEquals(title, page.getTitle().toString());
        }

        // scan over the rows
        Result<String, WebPage> result = store.execute(store.newQuery());
        int count = 0;
        while (result.next()) {
            try {
                // only count keys in the store for the current id
                if (result.getKey().contains(id))
                    count++;
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        // check amount
        assertEquals(max, count);
    }

    /**
     * Tests multiple thread reading and writing to the same store, this should be
     * no problem because {@link DataStore} implementations claim to be thread
     * safe.
     * 
     * @throws Exception
     */
    @Test
    @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
    public void testMultithreaded() throws Exception {
        // create a fixed thread pool
        int numThreads = 8;
        ExecutorService pool = Executors.newFixedThreadPool(numThreads);

        // define a list of tasks
        Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
        for (int i = 0; i < numThreads; i++) {
            tasks.add(new Callable<Integer>() {
                @Override
                public Integer call() {
                    try {
                        // run a sequence
                        readWrite(Thread.currentThread().getName(), webPageStore);
                        // everything ok, return 0
                        return 0;
                    } catch (Exception e) {
                        e.printStackTrace();
                        // this will fail the test
                        return 1;
                    }
                }
            });
        }

        // submit them at once
        List<Future<Integer>> results = pool.invokeAll(tasks);

        // check results
        for (Future<Integer> result : results) {
            assertEquals(0, (int) result.get());
        }
    }

    /**
     * Tests multiple processes reading and writing to the same store backend, 
     * this is to simulate a multi process Nutch environment (i.e. MapReduce).
     * 
     * @throws Exception
     */
    @Test
    public void testMultiProcess() throws Exception {
        // create and start a hsql server, a stand-alone (memory backed) db
        // (important: a stand-alone server should be used because simple
        //  file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
        Server server = new Server();
        server.setDaemon(true);
        server.setSilent(true); // disables LOTS of trace
        final String className = getClass().getName();
        String dbName = "test";
        server.setDatabasePath(0, "mem:" + dbName);
        server.setDatabaseName(0, dbName);
        server.start();

        //create the store so that the tests can start right away
        StorageUtils.createWebStore(conf, String.class, WebPage.class);

        // create a fixed thread pool
        int numThreads = 4;
        ExecutorService pool = Executors.newFixedThreadPool(numThreads);

        // spawn multiple processes, each thread spawns own process
        Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
        for (int i = 0; i < numThreads; i++) {
            tasks.add(new Callable<Integer>() {
                @Override
                public Integer call() {
                    try {
                        String separator = System.getProperty("file.separator");
                        String classpath = System.getProperty("java.class.path");
                        String pathSeparator = System.getProperty("path.separator");
                        // connect local sql service
                        classpath = "./src/testprocess" + pathSeparator + classpath;
                        String path = System.getProperty("java.home") + separator + "bin" + separator + "java";
                        ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp", classpath, className);
                        processBuilder.redirectErrorStream(true);
                        Process process = processBuilder.start();
                        InputStream in = process.getInputStream();
                        int exit = process.waitFor();
                        //print the output of the process
                        System.out.println("===Process stream for " + Thread.currentThread() + "\n"
                                + IOUtils.toString(in) + "===End of process stream.");
                        in.close();
                        // process should exit with zero code
                        return exit;
                    } catch (Exception e) {
                        e.printStackTrace();
                        // this will fail the test
                        return 1;
                    }
                }
            });
        }

        // submit them at once
        List<Future<Integer>> results = pool.invokeAll(tasks);

        // check results
        for (Future<Integer> result : results) {
            assertEquals(0, (int) result.get());
        }

        //stop db
        server.stop();
    }

    public static void main(String[] args) throws Exception {
        // entry point for the multiprocess test
        System.out.println("Starting!");

        Configuration localConf = CrawlTestUtil.createConfiguration();
        localConf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");

        DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf, String.class, WebPage.class);
        readWrite("single_id", store);
        System.out.println("Done.");
    }
}