knowledgeMiner.InformationDripBootstrapping.java Source code

Java tutorial

Introduction

Here is the source code for knowledgeMiner.InformationDripBootstrapping.java

Source

/*******************************************************************************
 * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *    Sam Sarjant - initial API and implementation
 ******************************************************************************/
package knowledgeMiner;

import io.IOManager;
import io.ResourceAccess;
import io.ontology.OntologySocket;
import io.resources.WMISocket;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Executors;

import knowledgeMiner.preprocessing.KnowledgeMinerPreprocessor;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;

import cyc.OntologyConcept;

/**
 * A class for performing ripple bootstrapping experiments. The experiment is
 * provided with two parameters: starting point and ripple size.
 * 
 * @author Sam Sarjant
 */
public class InformationDripBootstrapping {
    /** The thread executor. */
    private CompletionService<Collection<ConceptModule>> pool_;

    /** The initial seed concept. */
    private ConceptModule initial_;

    /** The maximum number of ripples (defaults to exhaustive ripples). */
    private int maxRipple_ = -1;

    /** The ontology access. */
    private OntologySocket ontology_;

    /** The number of repeats (bootstrapping passes). Defaults to 1. */
    private int repeats_ = 1;

    /** WMI access. */
    private WMISocket wmi_;

    /** The starting run number for bootstrapping purposes. */
    private int initialRunNumber_;

    /**
     * Constructor for a new InformationDripBootstrapping
     * 
     * @param concept
     *            The concept to start with (article or concept).
     * @param ripples
     *            The distance of 'ripples' to update (optional).
     * @param repeats
     *            The number of 'drips' to perform (optional).
     * @param initialRunNumber
     *            The starting run number for bootstrapping purposes.
     * @throws Exception
     */
    public InformationDripBootstrapping(String concept, String ripples, String repeats, String initialRunNumber)
            throws Exception {
        wmi_ = ResourceAccess.requestWMISocket();
        ontology_ = ResourceAccess.requestOntologySocket();
        KnowledgeMinerPreprocessor.getInstance();
        IOManager.newInstance();

        initial_ = null;
        if (concept.startsWith("#")) {
            if (!ontology_.inOntology(concept.substring(1)))
                throw new IllegalArgumentException("Concept does not exist!");
            initial_ = new ConceptModule(new OntologyConcept(concept.substring(1)));
        } else {
            int articleID = wmi_.getArticleByTitle(concept);
            if (articleID == -1)
                throw new IllegalArgumentException("Article does not exist!");
            initial_ = new ConceptModule(articleID);
        }

        maxRipple_ = -1;
        if (ripples != null)
            maxRipple_ = Integer.parseInt(ripples);
        repeats_ = 1;
        if (repeats != null)
            repeats_ = Integer.parseInt(repeats);
        initialRunNumber_ = 0;
        if (initialRunNumber != null)
            initialRunNumber_ = Integer.parseInt(initialRunNumber);
    }

    /**
     * Run the experiment by starting with a seed concept/article and rippling
     * outwards to other linked concepts/articles. When max ripple is reached,
     * repeat for as many repeats as defined.
     */
    private void run() {
        ResourceAccess.newInstance();
        IOManager.newInstance();
        KnowledgeMiner.readInOntologyMappings(initialRunNumber_);
        Executor executor = Executors.newFixedThreadPool(KnowledgeMiner.getNumThreads());
        pool_ = new ExecutorCompletionService<Collection<ConceptModule>>(executor);
        for (int i = 0; i < repeats_; i++) {
            KnowledgeMiner.runID_ = initialRunNumber_ + i;

            // Set up completed collections
            Set<OntologyConcept> completedConcepts = Collections
                    .newSetFromMap(new ConcurrentHashMap<OntologyConcept, Boolean>());
            Set<Integer> completedArticles = Collections.newSetFromMap(new ConcurrentHashMap<Integer, Boolean>());

            // Add the initial
            Collection<ConceptModule> rippleLayer = new HashSet<>();
            rippleLayer.add(initial_);

            int maxRipples = (maxRipple_ == -1) ? Integer.MAX_VALUE : maxRipple_;
            for (int r = 0; r <= maxRipples; r++) {
                System.out.println("\nRipple " + r + ": " + rippleLayer.size() + " tasks to process.\n");
                int count = 0;

                // Simultaneously process every concept in the ripple layer
                System.out.print(count++ + ": ");
                for (ConceptModule cm : rippleLayer) {
                    pool_.submit(new RippleTask(cm, r != maxRipples, completedArticles, completedConcepts));
                }

                // Wait for the tasks to finish and store results
                Collection<ConceptModule> nextLayer = new HashSet<>();
                for (int j = 0; j < rippleLayer.size(); j++) {
                    try {
                        // Get the results and process them.
                        Collection<ConceptModule> result = pool_.take().get();
                        if (count <= rippleLayer.size())
                            System.out.print(count++ + ": ");
                        if (r == maxRipples)
                            continue;

                        // Add the articles/concepts to the next ripple layer
                        for (ConceptModule cm : result) {
                            if (cm.getConcept() != null && !completedConcepts.contains(cm.getConcept()))
                                nextLayer.add(cm);
                            else if (cm.getArticle() != -1 && !completedArticles.contains(cm.getArticle()))
                                nextLayer.add(cm);
                        }
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    } catch (ExecutionException e) {
                        e.printStackTrace();
                    }
                }
                rippleLayer = nextLayer;

                // TODO Record details of this run

                // Clear preprocessed data
                KnowledgeMinerPreprocessor.getInstance().writeHeuristics();

                if (rippleLayer.isEmpty())
                    break;
            }
        }
    }

    public static void main(String[] args) {
        Options options = new Options();
        options.addOption("r", true, "The number of ripples (-1 for unlimited).");
        options.addOption("c", true, "The concept to begin with (\"article\" or #concept).");
        options.addOption("N", true, "The initial hashmap size for the nodes.");
        options.addOption("i", true, "Initial run number.");

        CommandLineParser parser = new BasicParser();
        try {
            CommandLine parse = parser.parse(options, args);
            InformationDripBootstrapping rb = new InformationDripBootstrapping(parse.getOptionValue("c"),
                    parse.getOptionValue("r"), parse.getOptionValue("N"), parse.getOptionValue("i"));
            rb.run();
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
}