jef.tools.string.CharUtils.java Source code

Java tutorial

Introduction

Here is the source code for jef.tools.string.CharUtils.java

Source

/*
 * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package jef.tools.string;

import jef.common.wrapper.IntRange;
import jef.common.wrapper.IntRangeGroup;
import jef.tools.StringUtils;

public final class CharUtils extends org.apache.commons.lang.CharUtils {
   /*
    * long int char byte??? long?8int ?4short /
    * char? byte ???,?
    * longint??,? short-32767 ~ 32768. 
    * InputStream.read(),int,?char?0~65535.(0~FFFF)
    * ???-1?char?int?char? ????java? char
    * ==-1???charint.
    * 
    * byte -128 ~ 127.?0 255? byte -128 ~ 127
    * ?Integercacheint? byteint int unsignedByte =
    * signedByte >= 0 ? signedByte : 256 + signedByte;
    * intbyte(byte)?? int byteValue; int temp = intValue
    * % 256; if ( intValue < 0) { byteValue = temp < -128 ? 256 + temp : temp;
    * } else { byteValue = temp > 127 ? temp - 256 : temp; }
    * System.out.println(); System.out.println(byte2hex(md));
    */

   /**
    * ?
    */
   public static final char[] NUMBERS = "0123456789".toCharArray();
   /**
    * ???
    */
   public static final char[] HEX_NUMBERS = "0123456789ABCDEFabcdef".toCharArray();
   /**
    * ??
    */
   public static final char[] ALPHA_UPPERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
   /**
    * ???
    */
   public static final char[] ALPHA_LOWERS = "abcdefghijklmnopqrstuvwxyz".toCharArray();
   /**
    * ???
    */
   public static final char[] ALPHAS = "ABCDEFGHIJKLMNOPQRSTNVWXYZabcdefghijklmnopqrstuvwxyz".toCharArray();
   /**
    * ??
    */
   public static final char[] SYMBOLS = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".toCharArray();
   /**
    * ??
    */
   public static final char[] ALPHA_NUM_UNDERLINE = "0123456789ABCDEFGHIJKLMNOPQRSTNVWXYZabcdefghijklmnopqrstuvwxyz_".toCharArray();
   /**
    * ??URL???
    */
   public static final char[] CHARS_IN_URL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890_&?=#%;~,./-+".toCharArray();

   public static final String JAP_POINT = new String(new char[] { 12539, 65381, 40658, 65378, 65379, 9834, 12316, 65533, 8722, 8810, 8811, 63, '' });
   public static final String CHN_POINT = new String(new char[] { 183, 183, 40657, '', '?', 65374, 65374, 13199, '-', '', '', '', ' ' });

   /**
    * ?
    * 
    * @param c
    * @return true if char is a number
    */
   public static final boolean isNumber(char c) {
      return c >= 48 && c <= 57;
   }

   /**
    * ?(?)
    * 
    * @param c
    * @return true if the char is space (chinese space included)
    */
   public static final boolean isSpace(char c) {
      return c == 32 || c == 12288;
   }

   /**
    * ??
    * 
    * @param c
    * @return true if the char is a alphabat in upper case
    */
   public static final boolean isUpperAlpha(char c) {
      return c >= 65 && c <= 90;
   }

   /**
    * ???
    * 
    * @param c
    * @return true if the char is alphabat in lower case.
    */
   public static final boolean isLowerAlpha(char c) {
      return c >= 97 && c <= 122;
   }

   /**
    * ????
    * 
    * @param c
    * @return true if the char is a symbol
    */
   public static boolean isSymbol(char c) {
      return (c >= 32 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c > 122 && c < 127);
   }

   /**
    * ?
    * 
    * @param c
    * @return true if the char is not a visible character
    */
   public static final boolean isCtrl(char c) {
      return (c < 32) || c > 255;
   }

   public static boolean isChinese(char c) {
      return c >= 0x4e00 && c <= 0x9fa5;
   }

   /**
    * ?(??)
    * 
    * @param c
    * @return <tt>true</tt> if char is chinese or japanese.., otherwise
    *         <tt>false</tt>
    */
   public static boolean isAsian(char c) {
      return (c > 255 && c != 65279);
   }

   public static final IntRange SBC_ALPHA_UPPER = new IntRange(65313, 65338);
   public static final IntRange SBC_ALPHA_LOWER = new IntRange(65345, 65370);
   public static final IntRangeGroup SBC_ALPHA = new IntRangeGroup(SBC_ALPHA_LOWER, SBC_ALPHA_UPPER);
   public static final IntRange SBC_NUMBER = new IntRange(65296, 65305);
   public static final IntRange SBC_CHARS_WITHOUT_SPACE = new IntRange(65281, 65374);
   public static final char SBC_SPACE = (char) 12288;

   /**
    * ?GB18030
    * 
    * @param c
    * @return
    */
   public static final boolean isNumberSBC(char c) {
      return SBC_NUMBER.contains(c);
   }

   /**
    * ?GB18030?
    * 
    * @param c
    * @return true if a char is a ?
    */
   public static final boolean isAlphaSBC(char c) {
      return SBC_ALPHA.contains((int) c);// (c>=65313 && c<=65338) ||(c>=65345
                                 // && c<=65370);
   }

   /**
    * ?GB18030???
    * 
    * @param c
    * @return true if the char is a ???
    */
   public static final boolean isKatakana(char c) {
      return (c >= 12449 && c <= 12542);
   }

   /**
    * ?GB18030 ???
    * 
    * @param c
    * @return ???true??
    */
   public static final boolean isHiragana(char c) {
      return (c >= 12353 && c <= 12435);
   }

   /**
    * 
    * 
    * @param c
    * @return enum CharType
    */
   public static CharType getType(char c) {
      if (isUpperAlpha(c) || isLowerAlpha(c)) {
         return CharType.ALPHA;
      } else if (isNumber(c)) {
         return CharType.NUMBER;
      } else if (isSymbol(c)) {
         return CharType.SYMBOL;
      } else if (isSpace(c)) {
         return CharType.SPACE;
      } else if (isCtrl(c)) {
         return CharType.CTRL;
      } else if (isKatakana(c)) {
         return CharType.KATAKANA;
      } else if (isHiragana(c)) {
         return CharType.HIRAGANA;
      } else if (isAlphaSBC(c)) {
         return CharType.ALPHA_SBC;
      } else if (isNumberSBC(c)) {
         return CharType.NUMBER_SBC;
      } else if (isPunctuation(c)) {
         return CharType.PUNCTUATION;
      } else {
         return CharType.ASIAN;
      }
   }

   /**
    * ??
    * @param c
    * @return
    */
   public static boolean isPunctuation(char c) {
      int type = Character.getType(c);
      return type >= 20 && type <= 25;
   }

   public enum CharType {
      /**
       * ?
       */
      ALPHA,
      /**
       * 
       */
      NUMBER,
      /**
       * 
       */
      SYMBOL,
      /**
       * 
       */
      CTRL,
      /**
       * 
       */
      SPACE,
      /**
       * 
       */
      PUNCTUATION,
      /**
       * ?
       */
      ALPHA_SBC,
      /**
       * ???
       */
      KATAKANA,
      /**
       * ???
       */
      HIRAGANA,
      /**
       * 
       */
      NUMBER_SBC,
      /**
       * 
       */
      ASIAN, // ???
   }

   /**
    * (SBC case) 12288?32
    * ?(33-126)(65281-65374)?65248
    * 
    * @param input
    * @return 
    */
   public static String ToSBC(String input) {
      // ?
      char[] c = input.toCharArray();
      for (int i = 0; i < c.length; i++) {
         if (c[i] == 32) {
            c[i] = (char) 12288;
            continue;
         }
         if (c[i] < 127)
            c[i] = (char) (c[i] + 65248);
      }
      return new String(c);
   }

   /**
    * ?
    * 
    * @param c
    * @return 
    */
   public static char toSBC(char c) {
      if (c == 32)
         return SBC_SPACE;
      if (c < 127) {
         return (char) (c + 65248);
      }
      return c;
   }

   /**
    * ?(DBC case) 12288?32
    * ?(33-126)(65281-65374)?65248
    */
   public static String toDBC(String input) {
      char[] c = input.toCharArray();
      for (int i = 0; i < c.length; i++) {
         if (c[i] == 12288) {
            c[i] = (char) 32;
            continue;
         }
         if (c[i] > 65280 && c[i] < 65375)
            c[i] = (char) (c[i] - 65248);
      }
      return new String(c);
   }

   /**
    * ?
    * 
    * @param c
    * @return ?
    */
   public static char toDBC(char c) {
      if (c == 12288)
         return (char) 32;
      if (c > 65280 && c < 65375)
         c = (char) (c - 65248);
      return c;
   }

   /**
    * ?unicode?GB18030?GB18030???
    * 
    * @param line
    * @return
    */
   public static String toGB18030(String line) {
      line = StringUtils.replaceChars(line, JAP_POINT, CHN_POINT);
      char[] cs = line.toCharArray();
      boolean flag = false;
      for (int i = 0; i < cs.length; i++) {
         char c = cs[i];
         if (c > 65379 && c < 65440) {
            cs[i] = (char) ((int) cs[i] - 52933);
            flag = true;
         }
      }
      if (flag) {
         line = new String(cs);
      }
      return line;
   }
}