com.fengduo.bee.search.utils.PinyinParser.java Source code

Java tutorial

Introduction

Here is the source code for com.fengduo.bee.search.utils.PinyinParser.java

Source

/*
 * Copyright 2015-2020 Fengduo.com All right reserved. This software is the confidential and proprietary information of
 * Fengduo.com ("Confidential Information"). You shall not disclose such Confidential Information and shall use it only
 * in accordance with the terms of the license agreement you entered into with Fengduo.com.
 */
package com.fengduo.bee.search.utils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import org.apache.commons.lang.StringUtils;

import com.fengduo.bee.commons.core.lang.Argument;

/**
 * 
 * 
 * @author zxc May 28, 2015 2:23:30 PM
 */
public class PinyinParser {

    public static Set<String> converter2AllSpell(String chines) {
        if (StringUtils.isEmpty(chines)) {
            return Collections.<String>emptySet();
        }
        StringBuffer pinyinName = new StringBuffer();
        char[] nameChar = chines.toCharArray();
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        for (int i = 0; i < nameChar.length; i++) {
            if (nameChar[i] > 128) {
                try {
                    // ??
                    String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
                    if (strs != null) {
                        pinyinName.append(StringUtils.join(strs, ","));
                    }
                } catch (BadHanyuPinyinOutputFormatCombination e) {
                    e.printStackTrace();
                }

            } else {
                pinyinName.append(nameChar[i]);
            }
            pinyinName.append(" ");
        }
        return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));

    }

    public static String converterToFirstSpell(String chines) {
        return StringUtils.join(converter2FirstSpell(chines), ",");
    }

    public static Set<String> converter2FirstSpell(String chines) {
        if (StringUtils.isEmpty(chines)) {
            return Collections.<String>emptySet();
        }
        StringBuffer pinyinName = new StringBuffer();
        char[] nameChar = chines.toCharArray();
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        for (int i = 0; i < nameChar.length; i++) {
            if (nameChar[i] > 128) {
                try {
                    // ??
                    String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
                    if (strs != null) {
                        for (int j = 0; j < strs.length; j++) {
                            // ??
                            pinyinName.append(strs[j].charAt(0));
                            if (j != strs.length - 1) {
                                pinyinName.append(",");
                            }
                        }
                    }
                } catch (BadHanyuPinyinOutputFormatCombination e) {
                    e.printStackTrace();
                }

            } else {
                pinyinName.append(nameChar[i]);
            }
            pinyinName.append(" ");
        }
        return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
    }

    // ?????????
    public static List<String> getPermutationSentence(List<String> termArrays, int start) {
        if (Argument.isEmpty(termArrays)) {
            return Collections.<String>emptyList();
        }
        int size = termArrays.size();
        if (start < 0 || start >= size) {
            return Collections.<String>emptyList();
        }
        if (start == size - 1) {
            return termArrays.subList(0, start);
        }
        List<String> strings = termArrays.subList(0, start);
        List<String> permutationSentences = getPermutationSentence(termArrays, start + 1);
        if (Argument.isEmpty(strings)) {
            return permutationSentences;
        }
        if (Argument.isEmpty(permutationSentences)) {
            return strings;
        }
        List<String> result = new ArrayList<String>();
        for (String pre : strings) {
            for (String suffix : permutationSentences) {
                result.add(pre + suffix);
            }
        }
        return result;
    }

    /**
     * ?????
     * 
     * @param args
     */
    private static List<Map<String, Integer>> discountTheChinese(String pinyin) {
        // ???
        List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();
        // ????
        Map<String, Integer> onlyOne = null;
        String[] firsts = pinyin.split(" ");
        // ?
        for (String str : firsts) {
            onlyOne = new Hashtable<String, Integer>();
            String[] china = str.split(",");
            // ?
            for (String s : china) {
                Integer count = onlyOne.get(s);
                if (count == null) {
                    onlyOne.put(s, new Integer(1));
                } else {
                    onlyOne.remove(s);
                    count++;
                    onlyOne.put(s, count);
                }
            }
            mapList.add(onlyOne);
        }
        return mapList;
    }

    /**
     * ???(??)
     * 
     * @param args
     */
    private static Set<String> parseTheChineseByObject(List<Map<String, Integer>> list) {
        Map<String, Integer> first = null; // ?,???
        // ????
        for (int i = 0; i < list.size(); i++) {
            // ???Map
            Map<String, Integer> temp = new Hashtable<String, Integer>();
            // first
            if (first != null) {
                // ????
                for (String s : first.keySet()) {
                    for (String s1 : list.get(i).keySet()) {
                        String str = s + s1;
                        temp.put(str, 1);
                    }
                }
                // ???
                if (temp != null && temp.size() > 0) {
                    first.clear();
                }
            } else {
                for (String s : list.get(i).keySet()) {
                    String str = s;
                    temp.put(str, 1);
                }
            }
            // ???
            if (temp != null && temp.size() > 0) {
                first = temp;
            }
        }
        Set<String> result = new HashSet<String>();
        if (first != null) {
            // ????
            for (String str : first.keySet()) {
                result.add(str);
                if (result.size() >= 5) {
                    break;
                }
            }
        }
        return result;
    }

    public static void main(String[] args) {
        String str = "???";
        System.out.println(converter2FirstSpell(str));
        System.out.println(converter2AllSpell(str));
    }
}