com.hangum.tadpole.commons.util.UnicodeUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.hangum.tadpole.commons.util.UnicodeUtils.java

Source

/*******************************************************************************
 * Copyright (c) 2013 hangum.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License v2.1
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 * 
 * Contributors:
 *     hangum - initial API and implementation
 ******************************************************************************/
package com.hangum.tadpole.commons.util;

import java.lang.Character.UnicodeBlock;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

/**
 * ? ascii    ? ??  ?? ?.
 * 
 * @author hangum
 *
 */
public class UnicodeUtils {
    private static final Logger logger = Logger.getLogger(UnicodeUtils.class);

    /**
     * ? ? ascii    ??  
     * 
     * @param content
     * @return
     */
    public static String getUnicode(String content) {
        StringBuffer sbData = new StringBuffer();

        for (int i = 0; i < content.length(); i++) {
            char c = content.charAt(i);
            UnicodeBlock ub = UnicodeBlock.of(c);
            //         logger.debug("[check unicode]" + c + "[ascii code]" + (int)c);

            if (ub.equals(UnicodeBlock.BASIC_LATIN))
                sbData.append(c);
            else {
                //            Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);

                if (logger.isDebugEnabled())
                    logger.debug("[unicode] [" + c + "]");

                if (UnicodeBlock.HANGUL_SYLLABLES.equals(ub) || UnicodeBlock.HANGUL_COMPATIBILITY_JAMO.equals(ub)
                        || UnicodeBlock.HANGUL_JAMO.equals(ub) || UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS.equals(ub)
                        || UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A.equals(ub)
                        || UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B.equals(ub)
                        || UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS.equals(ub)
                        || UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT.equals(ub)
                        || UnicodeBlock.HIRAGANA.equals(ub) || UnicodeBlock.KATAKANA.equals(ub)
                        || UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS.equals(ub)) {

                    sbData.append(c);
                    //
                    // html ?     ? ? ascii code 160 ?.
                    //
                } else if ((int) c == 160) {
                    sbData.append(" ");
                }
                //            } else
                //               logger.debug("[i won't unicode block] [" + (int)c + "]");
                //            }
            }
        }

        return StringUtils.trimToEmpty(sbData.toString());

    }
}