me.smoe.adar.utils.cam.o.mapping.CAMatcher.java Source code

Java tutorial

Introduction

Here is the source code for me.smoe.adar.utils.cam.o.mapping.CAMatcher.java

Source

/**
 * Copyright (c) 2016, adar.w (adar.w@outlook.com) 
 * 
 * http://www.smoe.me
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package me.smoe.adar.utils.cam.o.mapping;

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import me.smoe.adar.spring.entity.CAMCasesScore;
import me.smoe.adar.spring.repository.CAMCasesScoreRepository;
import me.smoe.adar.utils.cam.o.common.SentenceAnalyzer;
import me.smoe.adar.utils.cam.o.common.CAM.Type;

@Component
public class CAMatcher {

    public static final int THRESHOLD_H = 10000;

    public static final int THRESHOLD_M = 7000;

    public static final int THRESHOLD_L = 5000;

    @Autowired
    private CAMCasesScoreRepository camCasesScoreRepository;

    public Long matcher(String name, String brand, String category, String desc) throws Exception {
        return matcher(name, brand, category, desc, THRESHOLD_M);
    }

    public Long matcher(String name, String brand, String category, String desc, int threshold) throws Exception {
        Long[] ca = matcherCA(name, brand, category, desc);

        if (ca != null) {
            return ca[1] > threshold ? ca[0] : null;
        }

        return null;
    }

    private Long[] matcherCA(String name, String brand, String category, String desc) throws Exception {
        Map<Long, Integer> caScoreMappingName = matcherCA(name, Type.NAME, Type.NAME.isNeedParticiple());
        Map<Long, Integer> caScoreMappingBrand = matcherCA(brand, Type.BRAND, Type.BRAND.isNeedParticiple());
        Map<Long, Integer> caScoreMappingCategory = matcherCA(category, Type.CATEGORY,
                Type.CATEGORY.isNeedParticiple());
        Map<Long, Integer> caScoreMappingDesc = matcherCA(desc, Type.DESC, Type.DESC.isNeedParticiple());

        Long bastCA = null;
        Long baseCAScore = 0L;
        @SuppressWarnings("unchecked")
        Set<Long> distinctCAs = distinctCAs(caScoreMappingName.keySet(), caScoreMappingBrand.keySet(),
                caScoreMappingCategory.keySet(), caScoreMappingDesc.keySet());
        for (Long ca : distinctCAs) {
            Long score = 0L;

            score += calcMatchScore(ca, caScoreMappingName, Type.NAME.getPower());
            score += calcMatchScore(ca, caScoreMappingBrand, Type.BRAND.getPower());
            score += calcMatchScore(ca, caScoreMappingCategory, Type.CATEGORY.getPower());
            score += calcMatchScore(ca, caScoreMappingDesc, Type.DESC.getPower());

            if (score > baseCAScore) {
                bastCA = ca;
                baseCAScore = score;
            }
        }

        return bastCA == null ? null : new Long[] { bastCA, baseCAScore };
    }

    private Integer calcMatchScore(Long ca, Map<Long, Integer> caScoreMappingName, double power) {
        return (int) (caScoreMappingName.containsKey(ca) ? caScoreMappingName.get(ca) * power : 0);
    }

    private Map<Long, Integer> matcherCA(String cases, Type type, boolean needParticiple) throws Exception {
        List<CAMCasesScore> casesScores = new ArrayList<>();
        for (String word : needParticiple ? SentenceAnalyzer.analyzer(cases) : Collections.singleton(cases)) {
            casesScores.addAll(findByTypeAndWord(type.getValue(), word));
        }

        Map<Long, Integer> caScoreMapping = new HashMap<>();
        for (CAMCasesScore casesScore : casesScores) {
            Long ca = casesScore.getCa();
            Integer score = casesScore.getScore();

            if (caScoreMapping.containsKey(ca)) {
                score = calcCumulateScore(caScoreMapping.get(ca), score);
            }

            caScoreMapping.put(ca, score);
        }

        return caScoreMapping;
    }

    private static Integer calcCumulateScore(int old, int cur) {
        return new BigDecimal(10000).subtract(new BigDecimal(old))
                .divide(new BigDecimal(10000), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(cur))
                .add(new BigDecimal(old)).intValue();
    }

    private Set<Long> distinctCAs(@SuppressWarnings("unchecked") Set<Long>... cas) {
        Set<Long> distinctCas = new HashSet<Long>();
        for (Set<Long> casGroup : cas) {
            distinctCas.addAll(casGroup);
        }

        return distinctCas;
    }

    private static final Map<String, List<CAMCasesScore>> SCORE_CACHE = new HashMap<>();

    private List<CAMCasesScore> findByTypeAndWord(int type, String word) {
        String key = type + "#" + word;
        if (SCORE_CACHE.containsKey(key)) {
            return SCORE_CACHE.get(key);
        } else {
            List<CAMCasesScore> socres = camCasesScoreRepository.findByTypeAndWord(type, word);
            SCORE_CACHE.put(key, socres);
            return socres;
        }
    }
}