Java tutorial
/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, ??, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.tools; import org.apache.commons.lang.StringUtils; import org.apdplat.superword.model.Word; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URL; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; /** * ??? * ????? * @author ?? */ public class WordSources { private WordSources() { } private static final Logger LOGGER = LoggerFactory.getLogger(WordSources.class); /** * ? * @return */ public static Set<Word> getSyllabusVocabulary() { return get("/word_primary_school.txt", "/word_junior_school.txt", "/word_senior_school.txt", "/word_university.txt", "/word_new_conception.txt", "/word_ADULT.txt", "/word_CET4.txt", "/word_CET6.txt", "/word_TEM4.txt", "/word_TEM8.txt", "/word_CATTI.txt", "/word_GMAT.txt", "/word_GRE.txt", "/word_SAT.txt", "/word_BEC.txt", "/word_MBA.txt", "/word_IELTS.txt", "/word_TOEFL.txt", "/word_TOEIC.txt", "/word_KY.txt"); } public static Set<Word> getAll() { Set<Word> data = get("/words.txt", "/word_computer.txt"); data.addAll(getSyllabusVocabulary()); return data; } /** * * ????? * index 1 * @param files ??/ * @return ?????? */ public static Set<Word> get(String... files) { return get(1, files); } public static Map<Word, AtomicInteger> convert(Map<String, AtomicInteger> words) { Map<Word, AtomicInteger> result = new HashMap<>(); words.keySet().forEach(w -> result.put(new Word(w, ""), words.get(w))); return result; } public static boolean isEnglish(String string) { for (char c : string.toLowerCase().toCharArray()) { if (c < 'a' || c > 'z') { return false; } } return true; } /** * * @param first * @param second * @return */ public static Set<Word> intersection(Set<Word> first, Set<Word> second) { LOGGER.info("?1" + first.size()); LOGGER.info("?2" + second.size()); Set<Word> result = first.stream().filter(w -> second.contains(w)).collect(Collectors.toSet()); LOGGER.info("?" + result.size()); return result; } public static Set<Word> minus(Set<Word> minuend, Set<Word> subtrahend) { LOGGER.info("?" + minuend.size()); LOGGER.info("?" + subtrahend.size()); Set<Word> result = minuend.stream().filter(word -> !subtrahend.contains(word)).collect(Collectors.toSet()); LOGGER.info("" + result.size()); return result; } public static void save(Set<Word> words, String path) { try { path = "src/main/resources" + path; LOGGER.info("??" + path); AtomicInteger i = new AtomicInteger(); List<String> list = words.stream().sorted().map(word -> i.incrementAndGet() + "\t" + word.getWord()) .collect(Collectors.toList()); Files.write(Paths.get(path), list); LOGGER.info("??"); } catch (Exception e) { LOGGER.error("??", e); } } /** * ????? * @param index ???0 * @param files ??/ * @return ?????? */ public static Set<Word> get(int index, String... files) { Set<Word> set = new HashSet<>(); for (String file : files) { URL url = null; if (file.startsWith("/")) { url = WordSources.class.getResource(file); } else { try { url = Paths.get(file).toUri().toURL(); } catch (Exception e) { LOGGER.error("URL", e); } } if (url == null) { LOGGER.error("??" + file); continue; } System.out.println("parse word file: " + url); List<String> words = getExistWords(url); Set<Word> wordSet = words.parallelStream() .filter(line -> !line.trim().startsWith("#") && !"".equals(line.trim())) .filter(line -> line.trim().split("\\s+").length >= index + 1) .map(line -> new Word(line.trim().split("\\s+")[index], "")) .filter(word -> StringUtils.isAlphanumeric(word.getWord())).collect(Collectors.toSet()); set.addAll(wordSet); } System.out.println("unique words count: " + set.size()); return set; } private static List<String> getExistWords(URL url) { try { return Files.readAllLines(Paths.get(url.toURI())); } catch (Exception e) { return Collections.emptyList(); } } public static Set<Word> stem(Set<Word> words) { return words.stream().filter(word -> word.getWord().length() > 3).filter(word -> !isPlural(words, word)) .collect(Collectors.toSet()); } public static Map<String, String> plural(Set<Word> words) { Map<String, String> data = new HashMap<>(); words.stream().filter(word -> word.getWord().length() > 3).forEach(word -> { isPlural(words, word, data); }); return data; } public static boolean isPlural(Set<Word> words, Word word) { return isPlural(words, word, new HashMap<>()); } public static boolean isPlural(Set<Word> words, Word word, Map<String, String> data) { String w = word.getWord(); //1??+y,?yi?es if (w.endsWith("ies")) { char c = w.charAt(w.length() - 4); if (!(isVowel(c)) && words.contains(new Word(w.substring(0, w.length() - 4) + "y", ""))) { log(w, "ies"); data.put(w, "ies"); return true; } } //2?ce, se, ze, s if (w.endsWith("ces") || w.endsWith("ses") || w.endsWith("zes")) { if (words.contains(new Word(w.substring(0, w.length() - 1), ""))) { log(w, "s"); data.put(w, "s"); return true; } } //3?s, sh, ch, x, es if (w.endsWith("ses") || w.endsWith("shes") || w.endsWith("ches") || w.endsWith("xes")) { if (words.contains(new Word(w.substring(0, w.length() - 2), ""))) { log(w, "es"); data.put(w, "es"); return true; } } //4?s if (w.endsWith("s")) { if (words.contains(new Word(w.substring(0, w.length() - 1), ""))) { log(w, "s"); data.put(w, "s"); return true; } } return false; } private static void log(String word, String suffix) { LOGGER.debug("??" + word + "\t" + suffix); } public static boolean isVowel(char _char) { switch (_char) { case 'a': return true; case 'e': return true; case 'i': return true; case 'o': return true; case 'u': return true; } return false; } public static void main(String[] args) { //AtomicInteger i = new AtomicInteger(); //stem(getSyllabusVocabulary()).forEach(w -> System.out.println(i.incrementAndGet() + "?" + w.getWord())); String html = HtmlFormatter.toHtmlForPluralFormat(plural(getSyllabusVocabulary())); System.out.println(html); } }