org.dragoneronca.nlp.wol.graph_building.WolGraphBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.dragoneronca.nlp.wol.graph_building.WolGraphBuilder.java

Source

/*
 * Copyright Paolo Dragone 2014.
 * Copyright Alessandro Ronca 2014.
 *
 * This file is part of Wiktionary Ontology.
 *
 * Wiktionary Ontology is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Wiktionary Ontology is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Wiktionary Ontology. If not, see <http://www.gnu.org/licenses/>.
 */

package org.dragoneronca.nlp.wol.graph_building;

import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.log4j.Logger;
import org.dragoneronca.nlp.wol.WolConfiguration;
import org.dragoneronca.nlp.wol.domain.RangedSenseIteratorFactory;
import org.dragoneronca.nlp.wol.domain.RangedSenseScanner;
import org.dragoneronca.nlp.wol.domain.WolDomainContext;
import org.dragoneronca.nlp.wol.domain.entities.SenseSet;

import javax.persistence.EntityManager;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;

/**
 * This task iterates over all the sense sets, POS-tagging content texts (glosses and sense
 * references) and creating semantic edges with an initial disambiguation probability based on
 * syntactic and semantic contexts.
 *
 * @author Paolo Dragone
 * @author Alessandro Ronca
 */
public class WolGraphBuilder implements Runnable {

    private static final Logger LOG = Logger.getLogger(WolGraphBuilder.class);
    private static final int DEFAULT_SENSESETS_BUNCH_SIZE = 50000;
    private static final int DEFAULT_SENSES_BUNCH_SIZE = 100000;
    private static final int DEFAULT_DELTA_TIME = 10 * 1000;
    private static final int DEFAULT_COMMIT_RATE = 100;
    private static final int SENSESETS_BUNCH_SIZE;
    private static final int SENSES_BUNCH_SIZE;
    private static final int DELTA_TIME;
    private static final int COMMIT_RATE;
    private final GraphType graphType;

    static {
        PropertiesConfiguration properties = WolConfiguration.getInstance().getConfiguration("environment");

        SENSESETS_BUNCH_SIZE = properties.getInt("wol_graph_builder.sensesets_bunch_size",
                DEFAULT_SENSESETS_BUNCH_SIZE);
        SENSES_BUNCH_SIZE = properties.getInt("wol_graph_builder.senses_bunch_size", DEFAULT_SENSES_BUNCH_SIZE);
        DELTA_TIME = properties.getInt("wol_graph_builder.delta_time", DEFAULT_DELTA_TIME);
        COMMIT_RATE = properties.getInt("wol_graph_builder.commit_rate", DEFAULT_COMMIT_RATE);
    }

    private boolean executed = false;

    /**
     * Default graph builder.
     */
    public WolGraphBuilder() {
        this.executed = false;
        this.graphType = GraphType.SIMILARITY_BASED;
    }

    /**
     * It constructs a graph builder according to a specified computation.
     *
     * @param graphType the graph type you want to build.
     */
    public WolGraphBuilder(GraphType graphType) {
        this.executed = false;
        this.graphType = graphType;
    }

    /**
     * The main function to start the graph building algorithm.
     *
     * @param args no parameters.
     */
    public static void main(String[] args) {
        WolConfiguration.getInstance();
        WolGraphBuilder graphBuilder = new WolGraphBuilder();
        graphBuilder.run();
    }

    @Override
    public void run() {
        if (executed) {
            return;
        }
        executed = true;
        LOG.info("Start");

        WolDomainContext domainContext = WolDomainContext.getInstance();
        EntityManager entityManager = domainContext.getEntityManager();

        Set<SenseSet> senseSetsBunch;
        Iterator<SenseSet> senseSetIterator = domainContext.senseSetIterator(SENSESETS_BUNCH_SIZE);
        while (!(senseSetsBunch = getBunchOfSenseSets(senseSetIterator, SENSESETS_BUNCH_SIZE)).isEmpty()) {

            RangedSenseScanner rangedSenseScanner = new RangedSenseScanner(SENSES_BUNCH_SIZE);
            while (rangedSenseScanner.hasNext()) {
                RangedSenseIteratorFactory rangedSenseIteratorFactory = rangedSenseScanner.next();
                RangedSenseScanner.AlphabeticRange alphabeticRange = rangedSenseIteratorFactory.getRange();

                entityManager.getTransaction().begin();

                SenseSetProcessor processor = null;
                switch (graphType) {
                case SIMILARITY_BASED:
                    processor = new SimilarityBasedProcessor(rangedSenseIteratorFactory, alphabeticRange);
                    break;
                }

                int processed = 0;
                long startTime = System.currentTimeMillis();
                for (SenseSet senseSet : senseSetsBunch) {
                    processor.processSenseSet(senseSet);
                    processed++;

                    long endTime = System.currentTimeMillis();
                    if (endTime - startTime > DELTA_TIME) {
                        LOG.info("SenseSet processed: " + processed);
                        startTime = endTime;
                    }

                    if (processed % COMMIT_RATE == 0) {
                        entityManager.getTransaction().commit();
                        entityManager.getTransaction().begin();
                    }
                }
                entityManager.getTransaction().commit();
                Runtime.getRuntime().gc();
            }
            entityManager.clear();
            Runtime.getRuntime().gc();
        }
    }

    private Set<SenseSet> getBunchOfSenseSets(Iterator<SenseSet> iterator, int max) {
        Set<SenseSet> senseSets = new LinkedHashSet<>();
        while (senseSets.size() <= max && iterator.hasNext()) {
            senseSets.add(iterator.next());
        }
        return senseSets;
    }

    /**
     * A set of possible computations.
     */
    public static enum GraphType {
        SIMILARITY_BASED
    }

}