de.tudarmstadt.ukp.lmf.transform.sensealignments.FrameNetWiktionaryAlignment.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.lmf.transform.sensealignments.FrameNetWiktionaryAlignment.java

Source

/*******************************************************************************
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/

package de.tudarmstadt.ukp.lmf.transform.sensealignments;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils;

/**
 * Convert FrameNet-Wiktionary alignment to UBY Format
 * @author sh
 * 
 */
public class FrameNetWiktionaryAlignment extends SenseAlignment {
    private String debug;
    public StringBuilder logString;
    public int nullAlignment;
    protected static Log logger = LogFactory.getLog(FrameNetWiktionaryAlignment.class);
    private final SenseAlignmentUtils saUtils;

    public FrameNetWiktionaryAlignment(String sourceUrl, String destUrl, String dbDriver, String dbVendor,
            String alignmentFile, String user, String pass, String UBY_HOME) throws SQLException,
            InstantiationException, IllegalAccessException, ClassNotFoundException, FileNotFoundException {
        super(sourceUrl, destUrl, alignmentFile);
        logString = new StringBuilder();
        nullAlignment = 0;

        // new
        DBConfig s = new DBConfig(sourceUrl, dbDriver, dbVendor, user, pass, true);
        DBConfig d = new DBConfig(destUrl, dbDriver, dbVendor, user, pass, true);
        System.out.println("here");
        // temp_Duc is the name of temporary table
        if (sourceUrl.equals(destUrl)) {
            saUtils = new SenseAlignmentUtils(s, s, 0, 0, "temp_Duc", "temp_Duc");
        } else {
            saUtils = new SenseAlignmentUtils(s, d, 0, 0, "temp_Duc", "temp_Duc");
        }
        String decFields1 = "senseId varchar(255) NOT NULL, externalReference varchar(255)";
        String insData1 = "SELECT S.senseId, " + " M.externalReference  "
                + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)"
                + "where substring(S.senseId,1,2)=\"FN\"";
        String insData2 = "SELECT S.senseId, " + " M.externalReference  "
                + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)"
                + "where substring(S.senseId,1,2)=\"Wk\"";
        saUtils.createTempTable(decFields1, insData1, 0);
        saUtils.createTempTable(decFields1, insData2, 1);
    }

    @Override
    public void getAlignment() {
        int lineNumber = 0;
        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new FileReader(getAlignmentFileLocation()));
            String line = null;
            while ((line = reader.readLine()) != null) {
                // each component is separated by tab character
                lineNumber++;
                if ((lineNumber != 1) && !line.contains("###")) {
                    String[] lineSplitter = line.split("\t");
                    if (lineSplitter.length >= 2) {
                        String sourceID = lineSplitter[0];
                        String destID = lineSplitter[1];

                        if (!sourceID.equals("null") && !destID.equals("null")) {
                            debug = line;
                            List<Sense> sourceSenses;
                            sourceSenses = saUtils.getSensesByExternalRefID(sourceID, 0, false);
                            List<Sense> senseWKN = saUtils.getSensesByExternalRefID(destID, 1, false);
                            if (sourceSenses.size() != 0 && senseWKN.size() != 0) {
                                for (Sense FNSense : sourceSenses) {
                                    if (senseWKN.get(0).getId().startsWith("WktEN")) {
                                        addSourceSense(FNSense);
                                        addDestSense(senseWKN.get(0));
                                    } else if (senseWKN.size() > 1 && senseWKN.get(1).getId().startsWith("WktEN")) {
                                        addSourceSense(FNSense);
                                        addDestSense(senseWKN.get(1));

                                    } else {
                                        System.out.println("no target sense found: " + line);
                                    }
                                }
                            } else {
                                logString.append(debug);
                                logString.append(LF);
                            }
                        } else {
                            nullAlignment++;
                        }
                    }
                }
            }
            //restore memories
            saUtils.destroyTempTable();
        } catch (Exception ex) {
            IOUtils.closeQuietly(reader);
            ex.printStackTrace();
        }
    }

    /**
     * 
     * @param classifierOut - output tsv of weka classification
     * @param classifierIn - data section of arff input file for classification
     *        (contains ids in same order as classifications in classifierOut)
     * @param tsvFile
     * @throws IOException 
     */
    public static void classifierOutputToTsv(String classifierOut, String classifierIn, String tsvFile)
            throws IOException {
        List<String> res = new ArrayList<String>();
        // read classification
        InputStream is = new BufferedInputStream(new FileInputStream(new File(classifierOut)));
        Reader reader = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(reader);
        String line = br.readLine();
        line = br.readLine();
        ArrayList<String> scoreLines = new ArrayList<String>();
        while (line != null) {
            scoreLines.add(line);
            line = br.readLine();
            System.out.println(scoreLines.size());
        }
        // read ids
        InputStream is2 = new BufferedInputStream(new FileInputStream(new File(classifierIn)));
        Reader reader2 = new InputStreamReader(is2);
        BufferedReader br2 = new BufferedReader(reader2);
        String line2 = br2.readLine();
        line2 = br2.readLine();
        ArrayList<String> idLines = new ArrayList<String>();
        while (line2 != null) {
            idLines.add(line2);
            line2 = br2.readLine();
        }
        System.out.println(scoreLines.size());
        System.out.println(idLines.size());
        if (scoreLines.size() != idLines.size()) {// 
            logger.warn("files do not agree");
        }
        int positive = 0;
        int negative = 0;
        for (int i = 0; i < scoreLines.size(); i++) {
            String[] scoreitems = scoreLines.get(i).split(":");
            String[] iditems = idLines.get(i).split(",");
            String first = iditems[0];
            String second = iditems[1];
            String sysScore = scoreitems[2].split(",")[0];
            if (sysScore.equals("1")) {// pair classified as alignment
                res.add(first + "\t" + second);
                positive++;
            } else {
                negative++;
            }
        }
        logger.info("positive class-->added as alignment: " + positive);
        logger.info("negative class-->no alignment: " + negative);
        System.out.println("write positive class to file");
        FileWriter fw = new FileWriter(new File(tsvFile));
        for (String r : res) {
            fw.write(r + "\n");
        }
        fw.close();
        br2.close();
        br.close();
    }
}