com.paolodragone.wsn.util.Senses.java Source code

Java tutorial

Introduction

Here is the source code for com.paolodragone.wsn.util.Senses.java

Source

/*
 * Copyright Paolo Dragone 2014
 *
 * This file is part of WiktionarySemanticNetwork.
 *
 * WiktionarySemanticNetwork is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * WiktionarySemanticNetwork is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with WiktionarySemanticNetwork.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.paolodragone.wsn.util;

import com.google.common.collect.*;
import com.paolodragone.util.nlp.Language;
import com.paolodragone.util.nlp.POS;
import com.paolodragone.wsn.entities.Sense;

import java.util.*;
import java.util.stream.Stream;

/**
 * Utility class for processing senses.
 */
public class Senses {

    public static Sense newSense(String word, Language language, POS POS, int number, String gloss,
            String lexicalContext) {
        Sense sense = new Sense();
        sense.setWord(word);
        sense.setLanguage(language);
        sense.setPos(POS);
        sense.setNumber(number);
        sense.setGloss(gloss);
        sense.setLexicalContext(lexicalContext);
        return sense;
    }

    /**
     * Compute the unique id of a sense.
     * <p>
     * The unique id of a sense is the string {@code word#language#pos#number}.
     *
     * @param sense A sense
     * @return A String containing the unique id of the input sense
     */
    public static String getUId(Sense sense) {
        String word = sense.getWord();
        String lang = sense.getLanguage().getName();
        String pos = sense.getPos().name();
        int num = sense.getNumber();
        return word + "#" + lang + "#" + pos + "#" + num;
    }

    public static Map<String, Integer> buildSenseUIdMap(Collection<Sense> senses) {
        BiMap<String, Integer> senseUIdMap = HashBiMap.create();
        for (Sense sense : senses) {
            int id = sense.getId();
            senseUIdMap.put(getUId(sense), id);
        }
        return senseUIdMap;
    }

    public static Map<Integer, Sense> buildSenseMap(Collection<Sense> senses) {

        Map<Integer, Sense> senseMap = new HashMap<>();
        for (Sense sense : senses) {
            senseMap.put(sense.getId(), sense);
        }
        return senseMap;
    }

    public static ListMultimap<String, Sense> buildWordSensesMap(Collection<Sense> senses) {
        Multimap<String, Sense> wordSensesMap = ArrayListMultimap.create();
        ListMultimap<String, Sense> wordSensesSortedMap = ArrayListMultimap.create();

        for (Sense sense : senses) {
            wordSensesMap.put(sense.getWord().toLowerCase(), sense);
        }

        for (String word : wordSensesMap.keySet()) {
            List<Sense> senseList = new ArrayList<>(wordSensesMap.get(word));
            sortSenseList(senseList);
            wordSensesSortedMap.putAll(word, senseList);
        }

        return wordSensesSortedMap;
    }

    /**
     * Sorts a list of senses belonging to the same word and language.
     * <p>
     * The list is sorted according primarily to POS and secondarily to number.
     * The POS order is the order defined in the {@link com.paolodragone.util.nlp.POS} enumeration.
     *
     * @param senseList A list of senses
     */
    public static void sortSenseList(List<Sense> senseList) {
        Collections.sort(senseList, new Comparator<Sense>() {
            @Override
            public int compare(Sense sense1, Sense sense2) {
                int posCompare = POS.compare(sense1.getPos(), sense2.getPos());
                if (posCompare != 0) {
                    return posCompare;
                }
                return Integer.compare(sense1.getNumber(), sense2.getNumber());
            }
        });
    }

    public static boolean isValidSense(Sense sense) {
        return LexicalContexts.isValidLexicalContext(sense.getLexicalContext());
    }

    public static Stream<Sense> filterValidSenses(Stream<Sense> senses) {
        return senses.filter(Senses::isValidSense);
    }

    //    public static Stream<Sense> filterValidTermsFromSenses (Stream<Sense> senses) {
    //        return senses.map(Senses::filterValidTermsFromSense);
    //    }

    //    public static Sense filterValidTermsFromSense (Sense sense) {
    //        Terms.filterValidTerms(sense.getGlossTerms());
    //        return sense;
    //    }

    public static void adjustEnglishLanguage(Collection<Sense> senses) {
        for (Sense sense : senses) {
            sense.setLanguage(Language.EN);
        }
    }

    private Senses() {
    }
}