me.smoe.adar.utils.cam.o.statistics.Statistics.java Source code

Java tutorial

Introduction

Here is the source code for me.smoe.adar.utils.cam.o.statistics.Statistics.java

Source

/**
 * Copyright (c) 2016, adar.w (adar.w@outlook.com) 
 * 
 * http://www.smoe.me
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package me.smoe.adar.utils.cam.o.statistics;

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;

import me.smoe.adar.spring.entity.CAMCases;
import me.smoe.adar.spring.entity.CAMCasesScore;
import me.smoe.adar.spring.repository.CAMCasesRepository;
import me.smoe.adar.spring.repository.CAMCasesScoreRepository;
import me.smoe.adar.utils.cam.o.common.SentenceAnalyzer;
import me.smoe.adar.utils.cam.o.common.CAM.Type;

@Component
@Transactional
public class Statistics {

    private static Map<String, Integer> WORD_COUNT;

    private static Map<String, Map<Long, Integer>> WORD_M_CA_COUNT;

    @Autowired
    private CAMCasesRepository camCasesRepository;

    @Autowired
    private CAMCasesScoreRepository camCasesScoreRepository;

    public void run() throws Exception {
        camCasesScoreRepository.deleteAll();

        fetchAndAnalyzer(camCasesScoreRepository.nextVersion());
    }

    private void fetchAndAnalyzer(int version) throws Exception {
        long total = camCasesRepository.count();
        int cur = 0;
        for (Type type : Type.values()) {
            WORD_COUNT = new HashMap<>();
            WORD_M_CA_COUNT = new HashMap<>();

            for (CAMCases cases : camCasesRepository.findByType(type.getValue())) {
                handleCase(cases, type.isNeedParticiple());

                System.out.println(String.format("[CAM] Fetch: %s Total: %s", ++cur, total));
            }

            System.out.println("[CAM] Analyzer...");
            analyzer(type, version);
        }

        System.out.println("[CAM] Succ...");
    }

    private void handleCase(CAMCases cases, boolean needParticiple) throws Exception {
        Set<String> words = needParticiple ? SentenceAnalyzer.analyzer(cases.getCases())
                : Collections.singleton(cases.getCases());
        for (String word : words) {
            WORD_COUNT.put(word, WORD_COUNT.containsKey(word) ? WORD_COUNT.get(word) + 1 : 1);

            Map<Long, Integer> camaping;
            if (WORD_M_CA_COUNT.containsKey(word)) {
                camaping = WORD_M_CA_COUNT.get(word);
            } else {
                camaping = new HashMap<Long, Integer>();
                WORD_M_CA_COUNT.put(word, camaping);
            }
            camaping.put(cases.getCa(), camaping.containsKey(cases.getCa()) ? camaping.get(cases.getCa()) + 1 : 1);
        }
    }

    private void analyzer(Type type, int version) {
        for (Map.Entry<String, Integer> count : WORD_COUNT.entrySet()) {
            String word = count.getKey();
            Integer wordCount = count.getValue();

            if (wordCount <= type.getCases()) {
                continue;
            }

            Map<Long, Integer> camaping = WORD_M_CA_COUNT.get(word);
            for (Map.Entry<Long, Integer> came : camaping.entrySet()) {
                Long ca = came.getKey();
                Integer matchTimes = came.getValue();

                int score = calcScore(matchTimes, wordCount);
                if (score > type.getThresholdScore()) {
                    camCasesScoreRepository.save(new CAMCasesScore(type.getValue(), word, ca, score, version));
                }
            }
        }
    }

    private int calcScore(int cur, int total) {
        return new BigDecimal(cur).divide(new BigDecimal(total), 4, RoundingMode.HALF_UP)
                .multiply(new BigDecimal(10000)).intValue();
    }
}