de.tudarmstadt.ukp.dkpro.wsd.si.dictionary.util.GoogleDictionary.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.dkpro.wsd.si.dictionary.util.GoogleDictionary.java

Source

/*******************************************************************************
 * Copyright 2015
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/

package de.tudarmstadt.ukp.dkpro.wsd.si.dictionary.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.io.FileUtils;

/**
 * The Google Sense inventory (Valentin I. Spitkovsky and Angel X. Chang. 2012.
 * A Cross-Lingual Dictionary for English Wikipedia Concepts.
 * In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012).)
 *
 * @author Nicolai Erbs <erbs@ukp.informatik.tu-darmstadt.de>
 */
public class GoogleDictionary extends AbstractDictionary {

    /**
     *
     */
    private static final long serialVersionUID = 1L;

    public GoogleDictionary(String path, String neededMentionsPath) throws FileNotFoundException, IOException {

        HashSet<String> neededMentions = new HashSet<String>(FileUtils.readLines(new File(neededMentionsPath)));

        mentionMap = new HashMap<String, List<String[]>>();
        targetMap = new HashMap<String, Integer>();

        BufferedReader reader = new BufferedReader(
                new InputStreamReader(new BZip2CompressorInputStream(new FileInputStream(path))));
        String line;
        String[] lineArray;
        String anchor;
        String target;
        String score;
        boolean inKB;
        boolean isTranslation;
        //      Timer timer = new Timer(297073139);
        while ((line = reader.readLine()) != null) {
            lineArray = line.split("\t");

            anchor = lineArray[0];
            if (neededMentions.contains(anchor)) {
                anchor = anchor.replaceAll("\"", "").trim().replaceAll(" ", "_").toLowerCase();
                score = lineArray[1].split(" ")[0].trim();
                target = lineArray[1].split(" ")[1].trim();

                inKB = false;
                isTranslation = false;
                for (String flag : lineArray[1].substring(lineArray[1].indexOf(target) + target.length())
                        .split(" ")) {
                    if (flag.equals("KB")) {
                        inKB = true;
                    }
                    if (flag.equals("x")) {
                        isTranslation = true;
                    }
                }
                //            if(inKB && !isTranslation)
                if (!isTranslation) {
                    //add target to targetMap
                    if (targetMap.containsKey(target)) {
                        targetMap.put(target, targetMap.get(target) + 1);
                    } else {
                        targetMap.put(target, 1);
                    }

                    //add targets to mentionMap
                    if (!mentionMap.containsKey(anchor)) {
                        mentionMap.put(anchor, new LinkedList<String[]>());
                    }
                    mentionMap.get(anchor).add(new String[] { target, score });
                }

            }
        }

        reader.close();
    }

    @Override
    public boolean containsTarget(String senseId) {
        return targetMap.containsKey(senseId);
    }
}