de.tudarmstadt.ukp.uby.integration.alignment.xml.transform.sensealignments.VnWnSenseAlignmentXml.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.uby.integration.alignment.xml.transform.sensealignments.VnWnSenseAlignmentXml.java

Source

/*******************************************************************************
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.uby.integration.alignment.xml.transform.sensealignments;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import de.tudarmstadt.ukp.integration.alignment.xml.model.Alignments;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Decision;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Decisiontype;
import de.tudarmstadt.ukp.integration.alignment.xml.model.ResourceXml;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Source;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Target;
import de.tudarmstadt.ukp.integration.alignment.xml.model.XmlMeta;

/**
 * Convert given VerbNet to WordNet alignment file to generic alignment xml
 */
public class VnWnSenseAlignmentXml extends SenseAlignmentXml {

    private final Log logger = LogFactory.getLog(getClass());

    public VnWnSenseAlignmentXml(String alignmentFile, String outFile) {
        super(alignmentFile, outFile);
    }

    @Override
    public void toAlignmentXml(XmlMeta metadata) {
        String decisionSrc = metadata.decisiontypes.get(0).id;
        BufferedReader reader = null;
        List<Source> sources = new ArrayList<>();
        try {
            System.out.println(alignmentFile);
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(alignmentFile))));
            String line = null;
            int lineNumber = 0;
            int paircount = 0;
            while ((line = reader.readLine()) != null) {

                String temp[] = line.split("#");
                String verbnetItem = temp[0]; // VerbNet_3.2_eng_sense
                String wordNetItem = temp[1];

                wordNetItem = wordNetItem.substring(4);
                wordNetItem = wordNetItem.replace(")", "").trim();

                if (wordNetItem.length() != 0) {
                    // create the source object
                    Source source = new Source();
                    source.ref = verbnetItem;
                    List<Target> targets = new ArrayList<>();

                    // elements separated by whitespace
                    String wordNetItems[] = wordNetItem.split(" ");

                    for (String wordnet : wordNetItems) {
                        if (wordnet.trim().length() > 0) {
                            // format to match wordnet part of speech and sense
                            // key
                            wordnet = wordnet.replaceAll("\\?", "");

                            String refString = "[POS: verb] ";
                            String wordnetref = refString + wordnet + "::";

                            Target target = new Target();
                            target.ref = wordnetref;
                            target.decision = new Decision();
                            target.decision.confidence = DEFAULTCONFIDENCE;
                            target.decision.src = decisionSrc;
                            target.decision.value = true; // default
                            targets.add(target);
                            paircount++;
                        }
                    }
                    source.targets = targets;
                    sources.add(source);
                } else {
                    logString.append("No alignment target in input for: " + verbnetItem);
                    logString.append(LF);
                }
                lineNumber++;
            }
            logger.info(logString.toString());
            logger.info("number of input lines:" + lineNumber);
            logger.info("number of alignment pairs:" + paircount);

            writer.writeMetaData(metadata);
            Alignments alignments = new Alignments();
            alignments.source = sources;
            writer.writeAlignments(alignments);
            writer.close();
            reader.close();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    @Override
    public XmlMeta getDefaultXmlMeta() {
        /*
         * Generate Metadata
         */
        XmlMeta metadata = new XmlMeta();
        metadata.title = "VerbNet-WordNet mapping from VerbNet version 3.2";
        metadata.creator = "Kipper et al., A large-scale Classification of English Verbs, LRE Journal, 42(1), 2008";
        metadata.date = "2015-03-12"; // download date
        metadata.description = "Manual mapping of VerbNet class members to WordNet sense keys, the mapping is part of the VerbNet";
        metadata.identifier = "VNWN32";
        metadata.publisher = "University of Colorado";
        metadata.rights = "VerbNet 3.0 (and 3.x) License";
        metadata.version = "3.2";
        ResourceXml sourceResource = new ResourceXml();
        sourceResource.description = "VerbNet version 3.2";
        sourceResource.id = "VN_Lexicon_0"; // matches UBY lexiconId
        sourceResource.language = "en";
        // identifiertype needs to match externalSystem in UBY:
        sourceResource.identifiertype = "VerbNet_3.2_eng_sense";
        metadata.sourceResource = sourceResource;
        ResourceXml targetResource = new ResourceXml();
        targetResource.description = "WordNet version 3.x";
        targetResource.id = "WN_Lexicon_0"; // matches UBY lexiconId
        targetResource.language = "en";
        // identifiertype needs to match externalSystem in UBY:
        targetResource.identifiertype = "WordNet 3.0 part of speech and sense key";
        metadata.targetResource = targetResource;
        Decisiontype type = new Decisiontype();
        type.id = "VerbNet_VNWN";
        type.name = "VerbNet 3.1 VNWN";
        type.type = Decisiontype.Decision.MANUAL;
        List<Decisiontype> decisionTypes = new ArrayList<>();
        decisionTypes.add(type);
        metadata.decisiontypes = decisionTypes;
        // no separate scores given => no scoretype information
        return metadata;
    }

    public static void main(String[] args) {
        String UBY_HOME = System.getenv("UBY_HOME");
        String alfile = UBY_HOME + "/VerbNet/verbNetWordNetAlignment3.2.srt";
        String outFile = UBY_HOME + "/target/verbNetWordNetAlignment3.2_gen.xml";
        VnWnSenseAlignmentXml al = new VnWnSenseAlignmentXml(alfile, outFile);
        al.toAlignmentXml(al.getDefaultXmlMeta());
    }
}