Java tutorial
/* * Copyright Paolo Dragone 2014 * * This file is part of WiktionarySemanticNetwork. * * WiktionarySemanticNetwork is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * WiktionarySemanticNetwork is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WiktionarySemanticNetwork. If not, see <http://www.gnu.org/licenses/>. */ package com.paolodragone.wsn.util; import com.google.common.collect.*; import com.paolodragone.util.nlp.Language; import com.paolodragone.util.nlp.POS; import com.paolodragone.wsn.entities.Sense; import java.util.*; import java.util.stream.Stream; /** * Utility class for processing senses. */ public class Senses { public static Sense newSense(String word, Language language, POS POS, int number, String gloss, String lexicalContext) { Sense sense = new Sense(); sense.setWord(word); sense.setLanguage(language); sense.setPos(POS); sense.setNumber(number); sense.setGloss(gloss); sense.setLexicalContext(lexicalContext); return sense; } /** * Compute the unique id of a sense. * <p> * The unique id of a sense is the string {@code word#language#pos#number}. * * @param sense A sense * @return A String containing the unique id of the input sense */ public static String getUId(Sense sense) { String word = sense.getWord(); String lang = sense.getLanguage().getName(); String pos = sense.getPos().name(); int num = sense.getNumber(); return word + "#" + lang + "#" + pos + "#" + num; } public static Map<String, Integer> buildSenseUIdMap(Collection<Sense> senses) { BiMap<String, Integer> senseUIdMap = HashBiMap.create(); for (Sense sense : senses) { int id = sense.getId(); senseUIdMap.put(getUId(sense), id); } return senseUIdMap; } public static Map<Integer, Sense> buildSenseMap(Collection<Sense> senses) { Map<Integer, Sense> senseMap = new HashMap<>(); for (Sense sense : senses) { senseMap.put(sense.getId(), sense); } return senseMap; } public static ListMultimap<String, Sense> buildWordSensesMap(Collection<Sense> senses) { Multimap<String, Sense> wordSensesMap = ArrayListMultimap.create(); ListMultimap<String, Sense> wordSensesSortedMap = ArrayListMultimap.create(); for (Sense sense : senses) { wordSensesMap.put(sense.getWord().toLowerCase(), sense); } for (String word : wordSensesMap.keySet()) { List<Sense> senseList = new ArrayList<>(wordSensesMap.get(word)); sortSenseList(senseList); wordSensesSortedMap.putAll(word, senseList); } return wordSensesSortedMap; } /** * Sorts a list of senses belonging to the same word and language. * <p> * The list is sorted according primarily to POS and secondarily to number. * The POS order is the order defined in the {@link com.paolodragone.util.nlp.POS} enumeration. * * @param senseList A list of senses */ public static void sortSenseList(List<Sense> senseList) { Collections.sort(senseList, new Comparator<Sense>() { @Override public int compare(Sense sense1, Sense sense2) { int posCompare = POS.compare(sense1.getPos(), sense2.getPos()); if (posCompare != 0) { return posCompare; } return Integer.compare(sense1.getNumber(), sense2.getNumber()); } }); } public static boolean isValidSense(Sense sense) { return LexicalContexts.isValidLexicalContext(sense.getLexicalContext()); } public static Stream<Sense> filterValidSenses(Stream<Sense> senses) { return senses.filter(Senses::isValidSense); } // public static Stream<Sense> filterValidTermsFromSenses (Stream<Sense> senses) { // return senses.map(Senses::filterValidTermsFromSense); // } // public static Sense filterValidTermsFromSense (Sense sense) { // Terms.filterValidTerms(sense.getGlossTerms()); // return sense; // } public static void adjustEnglishLanguage(Collection<Sense> senses) { for (Sense sense : senses) { sense.setLanguage(Language.EN); } } private Senses() { } }