Java tutorial
/** * Copyright (c) 2016, adar.w (adar.w@outlook.com) * * http://www.smoe.me * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package me.smoe.adar.utils.cam.o.statistics; import java.math.BigDecimal; import java.math.RoundingMode; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; import me.smoe.adar.spring.entity.CAMCases; import me.smoe.adar.spring.entity.CAMCasesScore; import me.smoe.adar.spring.repository.CAMCasesRepository; import me.smoe.adar.spring.repository.CAMCasesScoreRepository; import me.smoe.adar.utils.cam.o.common.SentenceAnalyzer; import me.smoe.adar.utils.cam.o.common.CAM.Type; @Component @Transactional public class Statistics { private static Map<String, Integer> WORD_COUNT; private static Map<String, Map<Long, Integer>> WORD_M_CA_COUNT; @Autowired private CAMCasesRepository camCasesRepository; @Autowired private CAMCasesScoreRepository camCasesScoreRepository; public void run() throws Exception { camCasesScoreRepository.deleteAll(); fetchAndAnalyzer(camCasesScoreRepository.nextVersion()); } private void fetchAndAnalyzer(int version) throws Exception { long total = camCasesRepository.count(); int cur = 0; for (Type type : Type.values()) { WORD_COUNT = new HashMap<>(); WORD_M_CA_COUNT = new HashMap<>(); for (CAMCases cases : camCasesRepository.findByType(type.getValue())) { handleCase(cases, type.isNeedParticiple()); System.out.println(String.format("[CAM] Fetch: %s Total: %s", ++cur, total)); } System.out.println("[CAM] Analyzer..."); analyzer(type, version); } System.out.println("[CAM] Succ..."); } private void handleCase(CAMCases cases, boolean needParticiple) throws Exception { Set<String> words = needParticiple ? SentenceAnalyzer.analyzer(cases.getCases()) : Collections.singleton(cases.getCases()); for (String word : words) { WORD_COUNT.put(word, WORD_COUNT.containsKey(word) ? WORD_COUNT.get(word) + 1 : 1); Map<Long, Integer> camaping; if (WORD_M_CA_COUNT.containsKey(word)) { camaping = WORD_M_CA_COUNT.get(word); } else { camaping = new HashMap<Long, Integer>(); WORD_M_CA_COUNT.put(word, camaping); } camaping.put(cases.getCa(), camaping.containsKey(cases.getCa()) ? camaping.get(cases.getCa()) + 1 : 1); } } private void analyzer(Type type, int version) { for (Map.Entry<String, Integer> count : WORD_COUNT.entrySet()) { String word = count.getKey(); Integer wordCount = count.getValue(); if (wordCount <= type.getCases()) { continue; } Map<Long, Integer> camaping = WORD_M_CA_COUNT.get(word); for (Map.Entry<Long, Integer> came : camaping.entrySet()) { Long ca = came.getKey(); Integer matchTimes = came.getValue(); int score = calcScore(matchTimes, wordCount); if (score > type.getThresholdScore()) { camCasesScoreRepository.save(new CAMCasesScore(type.getValue(), word, ca, score, version)); } } } } private int calcScore(int cur, int total) { return new BigDecimal(cur).divide(new BigDecimal(total), 4, RoundingMode.HALF_UP) .multiply(new BigDecimal(10000)).intValue(); } }