Java tutorial
/** * * APDPlat - Application Product Development Platform Copyright (c) 2013, ??, * yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.superword.tools; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; /** * ?? * * @author ?? */ public class SentenceScorer { private SentenceScorer() { } private static final Logger LOGGER = LoggerFactory.getLogger(SentenceScorer.class); public static TreeMap<Float, Map<String, List<String>>> score(String path) { return score(path, Integer.MAX_VALUE); } public static TreeMap<Float, Map<String, List<String>>> score(String path, int limit) { //? Set<String> fileNames = TextAnalyzer.getFileNames(path); //? Map<String, AtomicInteger> frequency = TextAnalyzer.frequency(fileNames); //? TreeMap<Float, Map<String, List<String>>> sentences = new TreeMap<>(); //?????? Set<Integer> hashes = new HashSet<>(); Set<String> repeat = new HashSet<>(); //????????? int count = 0; for (String fileName : fileNames) { try (BufferedReader reader = new BufferedReader( new InputStreamReader(new BufferedInputStream(new FileInputStream(fileName))))) { String book = Paths.get(fileName).toFile().getName().replace(".txt", ""); String line = null; while ((line = reader.readLine()) != null) { if (StringUtils.isBlank(line)) { continue; } int hc = line.hashCode(); if (hashes.contains(hc)) { repeat.add(line); continue; } hashes.add(hc); // float score = score(line, frequency); if (score > 0) { if (count >= limit) { LOGGER.debug("?????" + limit + "?"); return sentences; } count++; sentences.putIfAbsent(score, new HashMap<>()); sentences.get(score).putIfAbsent(book, new ArrayList<>()); sentences.get(score).get(book).add(line); } } } catch (IOException ex) { ex.printStackTrace(); } } LOGGER.debug("????" + repeat.size()); AtomicInteger i = new AtomicInteger(); repeat.forEach(r -> { LOGGER.debug("\t" + i.incrementAndGet() + "?" + r); }); LOGGER.debug("???" + count); return sentences; } public static void toTextFile(TreeMap<Float, Map<String, List<String>>> scores, String fileName) { LOGGER.debug("" + fileName); AtomicInteger bookCount = new AtomicInteger(); AtomicInteger sentenceCount = new AtomicInteger(); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(fileName))))) { AtomicInteger i = new AtomicInteger(); scores.entrySet().forEach(score -> { writeLine(writer, "score_(" + i.incrementAndGet() + "/" + scores.size() + ")" + "" + score.getKey()); Map<String, List<String>> books = score.getValue(); AtomicInteger j = new AtomicInteger(); books.entrySet().forEach(book -> { writeLine(writer, "\tbook_(" + j.incrementAndGet() + "/" + books.size() + ")" + "" + book.getKey()); bookCount.incrementAndGet(); AtomicInteger k = new AtomicInteger(); book.getValue().forEach(sentence -> { writeLine(writer, "\t\tsentence_(" + k.incrementAndGet() + "/" + book.getValue().size() + ")" + "" + sentence); sentenceCount.incrementAndGet(); }); }); }); writeLine(writer, "??" + sentenceCount.get()); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } LOGGER.debug("" + scores.keySet().size()); LOGGER.debug("??" + sentenceCount.get()); LOGGER.debug("?"); } private static void writeLine(BufferedWriter writer, String text) { try { writer.write(text + "\n"); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } public static float score(String sentence, Map<String, AtomicInteger> frequency) { //int maxFrequency = frequency.values().parallelStream().max((a,b) -> a.get()-b.get()).get().get(); //LOGGER.debug("?"+maxFrequency); // //isDebugEnabled??SO...YOU GOT IT? if (LOGGER.isDebugEnabled()) { LOGGER.debug("??" + sentence); } float score = 0; List<String> words = TextAnalyzer.seg(sentence); if (LOGGER.isDebugEnabled()) { LOGGER.debug("?" + words); } for (String word : words) { AtomicInteger fre = frequency.get(word); if (fre == null || fre.get() == 0) { LOGGER.error("?" + word + "??"); continue; } int f = fre.get(); float s = 1 / (float) f; if (LOGGER.isDebugEnabled()) { LOGGER.debug("?" + word + "?" + f + "?" + s); } score += s; } words.clear(); score = Math.round(score * 100) / (float) 100; LOGGER.debug("" + score); return score; } public static void main(String[] args) { TreeMap<Float, Map<String, List<String>>> scores = score("src/main/resources/it"); toTextFile(scores, "target/sentence_score_rank.txt"); } }