org.dragoneronca.nlp.wol.graph_building.FirstSenseGraphBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.dragoneronca.nlp.wol.graph_building.FirstSenseGraphBuilder.java

Source

/*
 * Copyright Paolo Dragone 2014.
 * Copyright Alessandro Ronca 2014.
 *
 * This file is part of Wiktionary Ontology.
 *
 * Wiktionary Ontology is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Wiktionary Ontology is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Wiktionary Ontology. If not, see <http://www.gnu.org/licenses/>.
 */

package org.dragoneronca.nlp.wol.graph_building;

import com.google.common.collect.Sets;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.dragoneronca.nlp.wol.WolConfiguration;
import org.dragoneronca.nlp.wol.domain.RangedSenseIteratorFactory;
import org.dragoneronca.nlp.wol.domain.RangedSenseScanner;
import org.dragoneronca.nlp.wol.domain.WolDomainContext;
import org.dragoneronca.nlp.wol.domain.entities.SemanticEdge;
import org.dragoneronca.nlp.wol.domain.entities.Sense;
import org.dragoneronca.nlp.wol.domain.entities.TaggedTerm;
import org.dragoneronca.nlp.wol.domain.entities.TermSemanticEdge;
import org.dragoneronca.nlp.wol.tagger.WolTagger;
import org.dragoneronca.util.nlp.Language;
import org.dragoneronca.util.nlp.POS;

import javax.persistence.EntityManager;
import java.util.*;

/**
 * This class is able to build a WolGraph with only the edges pointing the the first sense of a
 * term.
 * <p/>
 * The order between senses is that one defined by Wiktionary.
 *
 * @author Paolo Dragone
 * @author Alessandro Ronca
 */
public class FirstSenseGraphBuilder implements Runnable {

    private static final WolTagger TAGGER = WolTagger.getInstance();
    private static final double MAX_PROBABILITY = 1.0;
    private static final int SENSES_BUNCH_SIZE;
    private static final int DEFAULT_SENSES_BUNCH_SIZE = 100000;

    static {
        PropertiesConfiguration properties = WolConfiguration.getInstance().getConfiguration("environment");

        SENSES_BUNCH_SIZE = properties.getInt("first_sense_graph_builder.senses_bunch_size",
                DEFAULT_SENSES_BUNCH_SIZE);
    }

    private final EntityManager entityManager;
    private boolean executed = false;

    /**
     * It constructs a <tt>FirstSenseGraphBuilder</tt> object.
     */
    public FirstSenseGraphBuilder() {
        this.entityManager = WolDomainContext.getInstance().getEntityManager();
    }

    /**
     * The main function to start the graph building algorithm.
     *
     * @param args no parameters.
     */
    public static void main(String[] args) {
        WolConfiguration.getInstance();
        FirstSenseGraphBuilder graphBuilder = new FirstSenseGraphBuilder();
        graphBuilder.run();
    }

    @Override
    public void run() {
        if (executed) {
            return;
        } else {
            executed = true;
        }

        RangedSenseScanner rangedSenseScanner = new RangedSenseScanner(SENSES_BUNCH_SIZE);
        while (rangedSenseScanner.hasNext()) {
            RangedSenseIteratorFactory rangedSenseIteratorFactory = rangedSenseScanner.next();
            RangedSenseScanner.AlphabeticRange alphabeticRange = rangedSenseIteratorFactory.getRange();

            Iterator<Sense> senseIterator = WolDomainContext.getInstance().senseIterator(SENSES_BUNCH_SIZE);
            while (senseIterator.hasNext()) {
                Sense sense = senseIterator.next();
                linkSenseFromTerms(sense, alphabeticRange, rangedSenseIteratorFactory);
            }
        }
    }

    private void linkSenseFromTerms(Sense sense, RangedSenseScanner.AlphabeticRange alphabeticRange,
            RangedSenseIteratorFactory rangedSenseIteratorFactory) {

        TAGGER.tagSenses(Arrays.asList(sense).iterator());

        for (TaggedTerm taggedTerm : sense.getTaggedGloss()) {

            if (!alphabeticRange.isInRange(taggedTerm.getLemma())) {
                continue;
            }

            Set<Sense> targetSenses = getTargetSenses(sense, Language.EN, taggedTerm.getPOS(),
                    taggedTerm.getLemma(), rangedSenseIteratorFactory);

            if (targetSenses.isEmpty()) {
                return;
            }

            Sense firstSense = Collections.min(targetSenses, new Comparator<Sense>() {
                @Override
                public int compare(Sense sense1, Sense sense2) {
                    return Integer.compare(sense1.getNumber(), sense2.getNumber());
                }
            });

            TermSemanticEdge semanticEdge = new TermSemanticEdge(firstSense, MAX_PROBABILITY, taggedTerm);
            semanticEdge.setOriginSense(sense);
            Set<SemanticEdge> outEdges = sense.getOutEdges();
            if (outEdges == null) {
                outEdges = new HashSet<>();
                sense.setOutEdges(outEdges);
            }
            outEdges.add(semanticEdge);
            entityManager.persist(semanticEdge);
        }
    }

    private Set<Sense> getTargetSenses(Sense originSense, Language lang, POS pos, String targetWord,
            RangedSenseIteratorFactory rangedSenseIteratorFactory) {
        Iterator<Sense> senseIterator = rangedSenseIteratorFactory.senseIterator(targetWord.replace('_', ' '), lang,
                pos);
        Set<Sense> referredSenses = Sets.newHashSet(senseIterator);

        Set<Sense> targetSenses = null;
        if (originSense.getContext() != null) {
            Set<Sense> sameContextReferredSenses = retainByContext(originSense.getContext(), referredSenses);

            if (!sameContextReferredSenses.isEmpty()) {
                targetSenses = sameContextReferredSenses;
            }
        }
        if (targetSenses == null) {
            targetSenses = referredSenses;
        }
        return targetSenses;
    }

    private Set<Sense> retainByContext(String context, Set<Sense> senses) {
        Set<Sense> retainedSenses = new HashSet<>();
        for (Sense sense : senses) {
            if (sense.getContext() != null && sense.getContext().equals(context)) {
                retainedSenses.add(sense);
            }
        }
        return retainedSenses;
    }

}