io.scigraph.owlapi.loader.BatchOwlLoader.java Source code

Java tutorial

Introduction

Here is the source code for io.scigraph.owlapi.loader.BatchOwlLoader.java

Source

/**
 * Copyright (C) 2014 The SciGraph authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.scigraph.owlapi.loader;

import static com.google.common.collect.Iterables.size;
import static java.lang.String.format;
import io.scigraph.neo4j.Graph;
import io.scigraph.neo4j.Neo4jModule;
import io.scigraph.owlapi.OwlApiUtils;
import io.scigraph.owlapi.OwlPostprocessor;
import io.scigraph.owlapi.loader.OwlLoadConfiguration.MappedProperty;
import io.scigraph.owlapi.loader.OwlLoadConfiguration.OntologySetup;
import io.scigraph.owlapi.loader.bindings.IndicatesCliqueConfiguration;
import io.scigraph.owlapi.loader.bindings.IndicatesMappedProperties;
import io.scigraph.owlapi.loader.bindings.IndicatesNumberOfConsumerThreads;
import io.scigraph.owlapi.loader.bindings.IndicatesNumberOfProducerThreads;
import io.scigraph.owlapi.postprocessors.CliqueConfiguration;
import io.scigraph.owlapi.postprocessors.Clique;

import java.io.File;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.inject.Inject;
import javax.inject.Provider;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.mapdb.DB;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Transaction;
import org.neo4j.tooling.GlobalGraphOperations;

import com.google.common.base.Optional;
import com.google.common.base.Stopwatch;
import com.google.inject.Guice;
import com.google.inject.Injector;

public class BatchOwlLoader {

    private static final Logger logger = Logger.getLogger(BatchOwlLoader.class.getName());

    static final OntologySetup POISON_STR = new OntologySetup();

    @Inject
    @IndicatesNumberOfConsumerThreads
    int numConsumers;

    @Inject
    @IndicatesNumberOfProducerThreads
    int numProducers;

    @Inject
    PostpostprocessorProvider postprocessorProvider;

    @Inject
    Graph graph;

    @Inject
    List<OntologySetup> ontologies;

    @Inject
    @IndicatesMappedProperties
    List<MappedProperty> mappedProperties;

    @Inject
    Provider<OwlOntologyConsumer> consumerProvider;

    @Inject
    Provider<OwlOntologyProducer> producerProvider;

    @Inject
    BlockingQueue<OWLCompositeObject> queue;

    @Inject
    BlockingQueue<OntologySetup> urlQueue;

    @Inject
    ExecutorService exec;

    @Inject
    @IndicatesCliqueConfiguration
    Optional<CliqueConfiguration> cliqueConfiguration;

    static {
        System.setProperty("entityExpansionLimit", Integer.toString(1_000_000));
        OwlApiUtils.silenceOboParser();
    }

    public void loadOntology() throws InterruptedException, ExecutionException {
        CompletionService<Long> completionService = new ExecutorCompletionService<Long>(exec);
        Set<Future<?>> futures = new HashSet<>();
        if (!ontologies.isEmpty()) {
            for (int i = 0; i < numConsumers; i++) {
                futures.add(completionService.submit(consumerProvider.get()));
            }
            for (int i = 0; i < numProducers; i++) {
                futures.add(completionService.submit(producerProvider.get()));
            }
            for (OntologySetup ontology : ontologies) {
                urlQueue.offer(ontology);
            }
            for (int i = 0; i < numProducers; i++) {
                urlQueue.offer(POISON_STR);
            }
        }

        while (futures.size() > 0) {
            Future<?> completedFuture = completionService.take();
            futures.remove(completedFuture);
            try {
                completedFuture.get();
            } catch (ExecutionException e) {
                logger.log(Level.SEVERE, "Stopping batchLoading due to: " + e.getMessage(), e);
                e.printStackTrace();
                exec.shutdownNow();
                throw new InterruptedException(e.getCause().getMessage());
            }
        }

        exec.shutdown();
        exec.awaitTermination(10, TimeUnit.DAYS);
        graph.shutdown();
        logger.info("Postprocessing...");
        postprocessorProvider.get().postprocess();

        if (cliqueConfiguration.isPresent()) {
            postprocessorProvider.runCliquePostprocessor(cliqueConfiguration.get());
        }

        postprocessorProvider.shutdown();

    }

    static class PostpostprocessorProvider implements Provider<OwlPostprocessor> {

        @Inject
        OwlLoadConfiguration config;

        @Inject
        Provider<GraphDatabaseService> graphDbProvider;

        GraphDatabaseService graphDb;

        @Override
        public OwlPostprocessor get() {
            graphDb = graphDbProvider.get();
            return new OwlPostprocessor(graphDb, config.getCategories());
        }

        public void runCliquePostprocessor(CliqueConfiguration cliqueConfiguration) {
            Clique clique = new Clique(graphDb, cliqueConfiguration);
            clique.run();
        }

        public void shutdown() {
            try (Transaction tx = graphDb.beginTx()) {
                logger.info(size(GlobalGraphOperations.at(graphDb).getAllNodes()) + " nodes");
                logger.info(size(GlobalGraphOperations.at(graphDb).getAllRelationships()) + " relationships");
                tx.success();
            }
            graphDb.shutdown();
        }

    }

    public static void load(OwlLoadConfiguration config) throws InterruptedException, ExecutionException {
        Injector i = Guice.createInjector(new OwlLoaderModule(config),
                new Neo4jModule(config.getGraphConfiguration()));
        BatchOwlLoader loader = i.getInstance(BatchOwlLoader.class);
        logger.info("Loading ontologies...");
        Stopwatch timer = Stopwatch.createStarted();
        // TODO catch exception and delete the incomplete graph through the graph location
        loader.loadOntology();
        DB mapDb = i.getInstance(DB.class);
        mapDb.close();
        logger.info(format("Loading took %d minutes", timer.elapsed(TimeUnit.MINUTES)));
    }

    protected static Options getOptions() {
        Option configPath = new Option("c", "configpath", true, "The location of the configuration file");
        configPath.setRequired(true);
        Options options = new Options();
        options.addOption(configPath);
        return options;
    }

    public static void main(String[] args) throws Exception {
        CommandLineParser parser = new DefaultParser();
        CommandLine cmd = null;
        try {
            cmd = parser.parse(getOptions(), args);
        } catch (ParseException e) {
            System.err.println(e.getMessage());
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(BatchOwlLoader.class.getSimpleName(), getOptions());
            System.exit(-1);
        }

        OwlLoadConfigurationLoader owlLoadConfigurationLoader = new OwlLoadConfigurationLoader(
                new File(cmd.getOptionValue('c').trim()));
        OwlLoadConfiguration config = owlLoadConfigurationLoader.loadConfig();
        load(config);
        // TODO: Is Guice causing this to hang? #44
        System.exit(0);
    }

}