Java tutorial
/* * Copyright Paolo Dragone 2014. * Copyright Alessandro Ronca 2014. * * This file is part of Wiktionary Ontology. * * Wiktionary Ontology is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Wiktionary Ontology is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Wiktionary Ontology. If not, see <http://www.gnu.org/licenses/>. */ package org.dragoneronca.nlp.wol.disambiguation; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.log4j.Logger; import org.dragoneronca.nlp.wol.WolConfiguration; import org.dragoneronca.nlp.wol.domain.WolDomainContext; import org.dragoneronca.util.concurrent.Consumer; import org.dragoneronca.util.concurrent.Producer; import org.dragoneronca.util.graphs.PathScorer; import org.dragoneronca.util.graphs.scorers.MarkovScorer; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; /** * This class orchestrates the execution of the disambiguation algorithm, iterating over the senses * and delivering them to multiple <tt>SenseSolver</tt> objects (according to the parallelism * degree). * <p/> * Data are read from the database at the begin, stored in main memory and elaborated. Only at the * end the database is updated in a unique transaction. * * @author Paolo Dragone * @author Alessandro Ronca */ public class Disambiguation implements Runnable, Producer<LightSense> { public static final int PARALLELISM_DEGREE; static { PropertiesConfiguration properties = WolConfiguration.getInstance().getConfiguration("environment"); DELTA_TIME = properties.getInt("disambiguation.delta_time"); CONVERGENCE_RATIO = properties.getDouble("disambiguation.convergence_ratio"); MAX_QUEUE_SIZE = properties.getInt("disambiguation.max_queue_size"); PARALLELISM_DEGREE = properties.getInt("disambiguation.parallelism_degree"); } private static final Logger LOG = Logger.getLogger(Disambiguation.class); private static final int DELTA_TIME; private static final double CONVERGENCE_RATIO; private static final int MAX_QUEUE_SIZE; private final PathScorer quasiCycleScorer; private final WolDomainContext domainContext; private final PathScorer cycleScorer; private final HashMap<Consumer, ArrayBlockingQueue<LightSense>> consumersMap = new HashMap<>(); private boolean executed = false; /** * Constructs a Disambiguation task that uses a unique scorer for cycles and quasi-cycles. * * @param uniqueScorer */ public Disambiguation(PathScorer uniqueScorer) { this.domainContext = WolDomainContext.getInstance(); this.cycleScorer = uniqueScorer; this.quasiCycleScorer = uniqueScorer; } /** * Constructs a Disambiguation task that uses different scorers for cycles and quasi-cycles. * * @param cycleScorer * @param quasiCycleScorer */ public Disambiguation(PathScorer cycleScorer, PathScorer quasiCycleScorer) { this.domainContext = WolDomainContext.getInstance(); this.cycleScorer = cycleScorer; this.quasiCycleScorer = quasiCycleScorer; } /** * The main function to start the disambiguation algorithm. * * @param args no parameters. */ public static void main(String[] args) { WolConfiguration.getInstance(); org.dragoneronca.nlp.wol.disambiguation.Disambiguation disambiguation = new org.dragoneronca.nlp.wol.disambiguation.Disambiguation( new MarkovScorer()); disambiguation.run(); } @Override public void run() { if (executed) { return; } else { executed = true; } LOG.info("Start"); long startTime = System.currentTimeMillis(); LOG.info("Graph loading..."); LightWolGraph lightWolGraph = new LightWolGraph(domainContext.edgesIterator()); int iteration = 0; int totConvergedTerms; int totProcessedTerms; int totImpossibleToDisambiguateTerms; do { LOG.info("Iteration n." + iteration); int processedSenses = 0; HashMap<SenseSolver, Thread> threadsMap = new HashMap<>(); for (int i = 0; i < PARALLELISM_DEGREE; i++) { SenseSolver solver = new SenseSolver(cycleScorer, quasiCycleScorer); addConsumer(solver); Thread thread = new Thread(solver); thread.start(); threadsMap.put(solver, thread); } totConvergedTerms = 0; totProcessedTerms = 0; totImpossibleToDisambiguateTerms = 0; try { Iterator<Integer> senseIdIterator = lightWolGraph.getSenses().iterator(); while (senseIdIterator.hasNext()) { for (ArrayBlockingQueue<LightSense> queue : consumersMap.values()) { if (senseIdIterator.hasNext()) { queue.put(new LightSense(senseIdIterator.next(), lightWolGraph)); processedSenses++; } else { break; } } long endTime = System.currentTimeMillis(); if (endTime - startTime > DELTA_TIME) { LOG.info("processed " + processedSenses + " senses"); startTime = endTime; } } for (ArrayBlockingQueue<LightSense> queue : consumersMap.values()) { queue.put(new LightSense()); // eof } for (Map.Entry<SenseSolver, Thread> entry : threadsMap.entrySet()) { try { entry.getValue().join(); totConvergedTerms += entry.getKey().getConvergedTerms(); totProcessedTerms += entry.getKey().getProcessedTerms(); totImpossibleToDisambiguateTerms += entry.getKey().getImpossibleToDisambiguate(); } catch (InterruptedException e) { LOG.warn("Exception while joining", e); } finally { removeConsumer(entry.getKey()); } } LOG.info("Converged terms: " + totConvergedTerms + " / " + totProcessedTerms); LOG.info("Terms impossible to disambiguate: " + totImpossibleToDisambiguateTerms); } catch (InterruptedException e) { LOG.warn("Exception while producing an element", e); } iteration++; } while ((double) totConvergedTerms / totProcessedTerms < CONVERGENCE_RATIO); LOG.info("The disambiguation algorithm has converged"); LOG.info("Updating scores in the database..."); updateDatabaseScores(lightWolGraph); } @Override public boolean addConsumer(Consumer<LightSense> consumer) { if (!consumersMap.containsKey(consumer)) { ArrayBlockingQueue<LightSense> q = new ArrayBlockingQueue<>(MAX_QUEUE_SIZE); consumer.setInputQueue(q); consumersMap.put(consumer, q); return true; } return false; } private void updateDatabaseScores(LightWolGraph lightWolGraph) { domainContext.getEntityManager().getTransaction().begin(); for (int senseId : lightWolGraph.getSenses()) { EdgeList edgeList = lightWolGraph.getOutEdgesOf(senseId); for (int i = 0; i < edgeList.numOfEdges(); i++) { domainContext.updateScore(edgeList.getId(i), edgeList.getWeight(i)); } } domainContext.getEntityManager().getTransaction().commit(); } @Override public boolean removeConsumer(Consumer<LightSense> consumer) { if (consumersMap.containsKey(consumer)) { consumersMap.remove(consumer); return true; } return false; } }