Java tutorial
/* * Copyright Paolo Dragone 2014. * Copyright Alessandro Ronca 2014. * * This file is part of Wiktionary Ontology. * * Wiktionary Ontology is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Wiktionary Ontology is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Wiktionary Ontology. If not, see <http://www.gnu.org/licenses/>. */ package org.dragoneronca.nlp.wol.graph_building; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.log4j.Logger; import org.dragoneronca.nlp.wol.WolConfiguration; import org.dragoneronca.nlp.wol.domain.RangedSenseIteratorFactory; import org.dragoneronca.nlp.wol.domain.RangedSenseScanner; import org.dragoneronca.nlp.wol.domain.WolDomainContext; import org.dragoneronca.nlp.wol.domain.entities.SenseSet; import javax.persistence.EntityManager; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Set; /** * This task iterates over all the sense sets, POS-tagging content texts (glosses and sense * references) and creating semantic edges with an initial disambiguation probability based on * syntactic and semantic contexts. * * @author Paolo Dragone * @author Alessandro Ronca */ public class WolGraphBuilder implements Runnable { private static final Logger LOG = Logger.getLogger(WolGraphBuilder.class); private static final int DEFAULT_SENSESETS_BUNCH_SIZE = 50000; private static final int DEFAULT_SENSES_BUNCH_SIZE = 100000; private static final int DEFAULT_DELTA_TIME = 10 * 1000; private static final int DEFAULT_COMMIT_RATE = 100; private static final int SENSESETS_BUNCH_SIZE; private static final int SENSES_BUNCH_SIZE; private static final int DELTA_TIME; private static final int COMMIT_RATE; private final GraphType graphType; static { PropertiesConfiguration properties = WolConfiguration.getInstance().getConfiguration("environment"); SENSESETS_BUNCH_SIZE = properties.getInt("wol_graph_builder.sensesets_bunch_size", DEFAULT_SENSESETS_BUNCH_SIZE); SENSES_BUNCH_SIZE = properties.getInt("wol_graph_builder.senses_bunch_size", DEFAULT_SENSES_BUNCH_SIZE); DELTA_TIME = properties.getInt("wol_graph_builder.delta_time", DEFAULT_DELTA_TIME); COMMIT_RATE = properties.getInt("wol_graph_builder.commit_rate", DEFAULT_COMMIT_RATE); } private boolean executed = false; /** * Default graph builder. */ public WolGraphBuilder() { this.executed = false; this.graphType = GraphType.SIMILARITY_BASED; } /** * It constructs a graph builder according to a specified computation. * * @param graphType the graph type you want to build. */ public WolGraphBuilder(GraphType graphType) { this.executed = false; this.graphType = graphType; } /** * The main function to start the graph building algorithm. * * @param args no parameters. */ public static void main(String[] args) { WolConfiguration.getInstance(); WolGraphBuilder graphBuilder = new WolGraphBuilder(); graphBuilder.run(); } @Override public void run() { if (executed) { return; } executed = true; LOG.info("Start"); WolDomainContext domainContext = WolDomainContext.getInstance(); EntityManager entityManager = domainContext.getEntityManager(); Set<SenseSet> senseSetsBunch; Iterator<SenseSet> senseSetIterator = domainContext.senseSetIterator(SENSESETS_BUNCH_SIZE); while (!(senseSetsBunch = getBunchOfSenseSets(senseSetIterator, SENSESETS_BUNCH_SIZE)).isEmpty()) { RangedSenseScanner rangedSenseScanner = new RangedSenseScanner(SENSES_BUNCH_SIZE); while (rangedSenseScanner.hasNext()) { RangedSenseIteratorFactory rangedSenseIteratorFactory = rangedSenseScanner.next(); RangedSenseScanner.AlphabeticRange alphabeticRange = rangedSenseIteratorFactory.getRange(); entityManager.getTransaction().begin(); SenseSetProcessor processor = null; switch (graphType) { case SIMILARITY_BASED: processor = new SimilarityBasedProcessor(rangedSenseIteratorFactory, alphabeticRange); break; } int processed = 0; long startTime = System.currentTimeMillis(); for (SenseSet senseSet : senseSetsBunch) { processor.processSenseSet(senseSet); processed++; long endTime = System.currentTimeMillis(); if (endTime - startTime > DELTA_TIME) { LOG.info("SenseSet processed: " + processed); startTime = endTime; } if (processed % COMMIT_RATE == 0) { entityManager.getTransaction().commit(); entityManager.getTransaction().begin(); } } entityManager.getTransaction().commit(); Runtime.getRuntime().gc(); } entityManager.clear(); Runtime.getRuntime().gc(); } } private Set<SenseSet> getBunchOfSenseSets(Iterator<SenseSet> iterator, int max) { Set<SenseSet> senseSets = new LinkedHashSet<>(); while (senseSets.size() <= max && iterator.hasNext()) { senseSets.add(iterator.next()); } return senseSets; } /** * A set of possible computations. */ public static enum GraphType { SIMILARITY_BASED } }