de.tudarmstadt.ukp.lmf.transform.wordnet.WNConverter.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.lmf.transform.wordnet.WNConverter.java

Source

/**
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package de.tudarmstadt.ukp.lmf.transform.wordnet;

import java.io.File;
import java.io.InputStream;
import java.util.LinkedList;

import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.dictionary.Dictionary;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import de.tudarmstadt.ukp.lmf.model.core.GlobalInformation;
import de.tudarmstadt.ukp.lmf.model.core.LexicalResource;
import de.tudarmstadt.ukp.lmf.model.core.Lexicon;
import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier;

/**
 *
 * Instance of this class converts
 * <a href="URL#https://wordnet.princeton.edu/wordnet/">WordNet 3.0</a>
 * to LMF-format
 * @author Zijad Maksuti
 * @author Judith Eckle-Kohler
 *
 */
public class WNConverter {

    private final Dictionary extWordnet; // extWordNet Dictionary
    protected File dictionaryPath;

    private final LexicalResource lexicalResource;

    private InputStream subcatStream; // subcat mapping file

    private final String dtd_version;
    private final String resourceVersion;

    private final Log LOG = LogFactory.getLog(getClass());

    /**
     * Constructs a {@link WNConverter} based on the consumed parameters
     * @param dictionaryPath the path of the WordNet dictionary files
     * @param wordNet initialized WordNet's {@link Dictionary} object
     * @param lexicalResource initialized object of  {@link LexicalResource}, which will be filled with WordNet's data
     * @param resourceVersion Version of this resource
     * @param dtd_version specifies the version of the .dtd which will be written to lexicalResource
     * @param exMappingPath path of the file containing manually entered mappings of lexemes and example sentences
     */
    public WNConverter(final File dictionaryPath, final Dictionary wordNet, final LexicalResource lexicalResource,
            final String resourceVersion, final String dtd) {
        this.dictionaryPath = dictionaryPath;
        this.extWordnet = wordNet;
        this.lexicalResource = lexicalResource;
        this.resourceVersion = resourceVersion;
        this.dtd_version = dtd;
        try {
            this.subcatStream = getClass().getClassLoader().getResource("WordNetSubcatMappings/wnFrameMapping.txt")
                    .openStream();
        } catch (Exception e) {
            LOG.error("Unable to load subcat mapping file. Aborting all operations");
            System.exit(1);
        }
    }

    /** @deprecated Use alternative constructor instead! */
    @Deprecated
    public WNConverter(File dictionaryPath, Dictionary wordNet, LexicalResource lexicalResource,
            String resourceVersion, String dtd, String exMappingPath) {
        this(dictionaryPath, wordNet, lexicalResource, resourceVersion, dtd);
        /*try {
           File exMapping = new File(exMappingPath);
        } catch (Exception e) {
           LOG.error(
           "Unable to load the file containing manually entered mappings of example sentences. Aborting all operations");
           System.exit(1);
        }*/
    }

    /**
     * Converts the informations provided by the initialized WordNet-{@link Dictionary} instance to LMF-format. <br>
     * The result of the conversion can be obtained by calling {@link WNConverter#getLexicalResource()}
     */
    public void toLMF() {
        try {
            LOG.info("Started converting WordNet to LMF...");
            SubcategorizationFrameExtractor subcategorizationFrameExtractor = new SubcategorizationFrameExtractor(
                    subcatStream);

            // Setting attributes of LexicalResource
            lexicalResource.setName("WordNet");
            lexicalResource.setDtdVersion(dtd_version);

            // *** Setting GlobalInformation *** //
            GlobalInformation globalInformation = new GlobalInformation();
            globalInformation.setLabel("LMF representation of WordNet 3.0");
            lexicalResource.setGlobalInformation(globalInformation);

            //*** Setting Lexicon (only one since WordNet is monolingual)***//
            Lexicon lexicon = new Lexicon();
            lexicon.setLanguageIdentifier(ELanguageIdentifier.ENGLISH);
            lexicon.setId("WN_Lexicon_0");
            lexicon.setName("WordNet");
            LinkedList<Lexicon> lexicons = new LinkedList<Lexicon>();
            lexicons.add(lexicon);
            lexicalResource.setLexicons(lexicons);

            // *** Creating Synsets *** //
            LOG.info("Generating Synsets...");
            SynsetGenerator synsetGenerator = new SynsetGenerator(extWordnet, resourceVersion);
            synsetGenerator.initialize();
            // Setting Synsets
            lexicon.setSynsets(synsetGenerator.getSynsets());
            LOG.info("Generating Synsets done");

            // *** Creating LexicalEntries *** //
            LOG.info("Generating LexicalEntries...");
            LexicalEntryGenerator lexicalEntryGenerator = new LexicalEntryGenerator(dictionaryPath, extWordnet,
                    synsetGenerator, subcategorizationFrameExtractor, resourceVersion);
            lexicon.setLexicalEntries(lexicalEntryGenerator.getLexicalEntries());
            LOG.info("Generating LexicalEntries done");

            // *** Creating SynsetRelations *** //
            LOG.info("Generating SynsetRelations...");
            SynsetRelationGenerator synsetRelationGenerator = new SynsetRelationGenerator(synsetGenerator,
                    lexicalEntryGenerator);
            // Update the relatios of previously extracted (and generated) Synsets
            synsetRelationGenerator.updateSynsetRelations();
            LOG.info("Generating SynsetRelations done");

            // *** Creating RelatedForms of LexicalEntries *** //
            LOG.info("Generating RelatedForms...");
            RelatedFormGenerator relatedFormGenerator = new RelatedFormGenerator(lexicalEntryGenerator);
            relatedFormGenerator.updateRelatedForms();
            LOG.info("Generating RelatedForms done");

            // *** Creating SenseRelations *** //
            LOG.info("Generating SenseRelations...");
            SenseRelationGenerator senseRelationGenerator = new SenseRelationGenerator(lexicalEntryGenerator);
            senseRelationGenerator.updateSenseRelations();
            LOG.info("Generating SenseRelations done");

            // *** Setting SubcategorizationFrames ***//
            lexicon.setSubcategorizationFrames(subcategorizationFrameExtractor.getSubcategorizationFrames());
            // setting SemanticPredicates
            lexicon.setSemanticPredicates(subcategorizationFrameExtractor.getSemanticPredicates());
            // setting SynSemCorrespondences
            lexicon.setSynSemCorrespondences(subcategorizationFrameExtractor.getSynSemCorrespondences());
        } catch (JWNLException e) {
            throw new RuntimeException("UBY-LMF creation failed", e);
        }
    }

    /**
     * Returns the {@link LexicalResource} object, which contains the results of the conversion
     * @return an instance of LexicalResource, which contains the results of the conversion
     */
    public LexicalResource getLexicalResource() {
        return this.lexicalResource;
    }

}