org.dragoneronca.nlp.wol.disambiguation.Disambiguation.java Source code

Java tutorial

Introduction

Here is the source code for org.dragoneronca.nlp.wol.disambiguation.Disambiguation.java

Source

/*
 * Copyright Paolo Dragone 2014.
 * Copyright Alessandro Ronca 2014.
 *
 * This file is part of Wiktionary Ontology.
 *
 * Wiktionary Ontology is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Wiktionary Ontology is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Wiktionary Ontology. If not, see <http://www.gnu.org/licenses/>.
 */

package org.dragoneronca.nlp.wol.disambiguation;

import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.log4j.Logger;
import org.dragoneronca.nlp.wol.WolConfiguration;
import org.dragoneronca.nlp.wol.domain.WolDomainContext;
import org.dragoneronca.util.concurrent.Consumer;
import org.dragoneronca.util.concurrent.Producer;
import org.dragoneronca.util.graphs.PathScorer;
import org.dragoneronca.util.graphs.scorers.MarkovScorer;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;

/**
 * This class orchestrates the execution of the disambiguation algorithm, iterating over the senses
 * and delivering them to multiple <tt>SenseSolver</tt> objects (according to the parallelism
 * degree).
 * <p/>
 * Data are read from the database at the begin, stored in main memory and elaborated. Only at the
 * end the database is updated in a unique transaction.
 *
 * @author Paolo Dragone
 * @author Alessandro Ronca
 */
public class Disambiguation implements Runnable, Producer<LightSense> {

    public static final int PARALLELISM_DEGREE;

    static {
        PropertiesConfiguration properties = WolConfiguration.getInstance().getConfiguration("environment");

        DELTA_TIME = properties.getInt("disambiguation.delta_time");
        CONVERGENCE_RATIO = properties.getDouble("disambiguation.convergence_ratio");
        MAX_QUEUE_SIZE = properties.getInt("disambiguation.max_queue_size");
        PARALLELISM_DEGREE = properties.getInt("disambiguation.parallelism_degree");
    }

    private static final Logger LOG = Logger.getLogger(Disambiguation.class);
    private static final int DELTA_TIME;
    private static final double CONVERGENCE_RATIO;
    private static final int MAX_QUEUE_SIZE;
    private final PathScorer quasiCycleScorer;
    private final WolDomainContext domainContext;
    private final PathScorer cycleScorer;
    private final HashMap<Consumer, ArrayBlockingQueue<LightSense>> consumersMap = new HashMap<>();
    private boolean executed = false;

    /**
     * Constructs a Disambiguation task that uses a unique scorer for cycles and quasi-cycles.
     *
     * @param uniqueScorer
     */
    public Disambiguation(PathScorer uniqueScorer) {
        this.domainContext = WolDomainContext.getInstance();
        this.cycleScorer = uniqueScorer;
        this.quasiCycleScorer = uniqueScorer;
    }

    /**
     * Constructs a Disambiguation task that uses different scorers for cycles and quasi-cycles.
     *
     * @param cycleScorer
     * @param quasiCycleScorer
     */
    public Disambiguation(PathScorer cycleScorer, PathScorer quasiCycleScorer) {
        this.domainContext = WolDomainContext.getInstance();
        this.cycleScorer = cycleScorer;
        this.quasiCycleScorer = quasiCycleScorer;
    }

    /**
     * The main function to start the disambiguation algorithm.
     *
     * @param args no parameters.
     */
    public static void main(String[] args) {
        WolConfiguration.getInstance();
        org.dragoneronca.nlp.wol.disambiguation.Disambiguation disambiguation = new org.dragoneronca.nlp.wol.disambiguation.Disambiguation(
                new MarkovScorer());
        disambiguation.run();
    }

    @Override
    public void run() {
        if (executed) {
            return;
        } else {
            executed = true;
        }

        LOG.info("Start");
        long startTime = System.currentTimeMillis();

        LOG.info("Graph loading...");
        LightWolGraph lightWolGraph = new LightWolGraph(domainContext.edgesIterator());

        int iteration = 0;
        int totConvergedTerms;
        int totProcessedTerms;
        int totImpossibleToDisambiguateTerms;
        do {
            LOG.info("Iteration n." + iteration);
            int processedSenses = 0;

            HashMap<SenseSolver, Thread> threadsMap = new HashMap<>();
            for (int i = 0; i < PARALLELISM_DEGREE; i++) {
                SenseSolver solver = new SenseSolver(cycleScorer, quasiCycleScorer);
                addConsumer(solver);
                Thread thread = new Thread(solver);
                thread.start();
                threadsMap.put(solver, thread);
            }

            totConvergedTerms = 0;
            totProcessedTerms = 0;
            totImpossibleToDisambiguateTerms = 0;
            try {
                Iterator<Integer> senseIdIterator = lightWolGraph.getSenses().iterator();
                while (senseIdIterator.hasNext()) {
                    for (ArrayBlockingQueue<LightSense> queue : consumersMap.values()) {
                        if (senseIdIterator.hasNext()) {
                            queue.put(new LightSense(senseIdIterator.next(), lightWolGraph));
                            processedSenses++;
                        } else {
                            break;
                        }
                    }

                    long endTime = System.currentTimeMillis();
                    if (endTime - startTime > DELTA_TIME) {
                        LOG.info("processed " + processedSenses + " senses");
                        startTime = endTime;
                    }
                }
                for (ArrayBlockingQueue<LightSense> queue : consumersMap.values()) {
                    queue.put(new LightSense()); // eof
                }

                for (Map.Entry<SenseSolver, Thread> entry : threadsMap.entrySet()) {
                    try {
                        entry.getValue().join();
                        totConvergedTerms += entry.getKey().getConvergedTerms();
                        totProcessedTerms += entry.getKey().getProcessedTerms();
                        totImpossibleToDisambiguateTerms += entry.getKey().getImpossibleToDisambiguate();
                    } catch (InterruptedException e) {
                        LOG.warn("Exception while joining", e);
                    } finally {
                        removeConsumer(entry.getKey());
                    }
                }
                LOG.info("Converged terms: " + totConvergedTerms + " / " + totProcessedTerms);
                LOG.info("Terms impossible to disambiguate: " + totImpossibleToDisambiguateTerms);

            } catch (InterruptedException e) {
                LOG.warn("Exception while producing an element", e);
            }

            iteration++;

        } while ((double) totConvergedTerms / totProcessedTerms < CONVERGENCE_RATIO);

        LOG.info("The disambiguation algorithm has converged");

        LOG.info("Updating scores in the database...");
        updateDatabaseScores(lightWolGraph);
    }

    @Override
    public boolean addConsumer(Consumer<LightSense> consumer) {
        if (!consumersMap.containsKey(consumer)) {
            ArrayBlockingQueue<LightSense> q = new ArrayBlockingQueue<>(MAX_QUEUE_SIZE);
            consumer.setInputQueue(q);
            consumersMap.put(consumer, q);
            return true;
        }
        return false;
    }

    private void updateDatabaseScores(LightWolGraph lightWolGraph) {
        domainContext.getEntityManager().getTransaction().begin();
        for (int senseId : lightWolGraph.getSenses()) {
            EdgeList edgeList = lightWolGraph.getOutEdgesOf(senseId);
            for (int i = 0; i < edgeList.numOfEdges(); i++) {
                domainContext.updateScore(edgeList.getId(i), edgeList.getWeight(i));
            }
        }
        domainContext.getEntityManager().getTransaction().commit();
    }

    @Override
    public boolean removeConsumer(Consumer<LightSense> consumer) {
        if (consumersMap.containsKey(consumer)) {
            consumersMap.remove(consumer);
            return true;
        }
        return false;
    }

}