Java tutorial
/* * LingPipe v. 3.9 * Copyright (C) 2003-2010 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */ //package com.aliasi.util; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.text.DecimalFormat; /** * Static utility methods for processing strings, characters and * string buffers. * * @author Bob Carpenter * @version 4.0.1 * @since LingPipe1.0 * @see java.lang.Character * @see java.lang.String * @see java.lang.StringBuilder */ public class Strings { /** * Appends a whitespace-normalized form of the specified character * sequence into the specified string buffer. Initial and final * whitespaces are not appended, and every other maximal sequence * of contiguous whitespace is replaced with a single whitespace * character. For instance, <code>" a\tb\n"</code> * would append the following characters to <code>"a * b"</code>. * * <P>This command is useful for text inputs for web or GUI * applications. * * @param cs Character sequence whose normalization is appended to * the buffer. * @param sb String buffer to which the normalized character * sequence is appended. */ public static void normalizeWhitespace(CharSequence cs, StringBuilder sb) { int i = 0; int length = cs.length(); while (length > 0 && isWhitespace(cs.charAt(length - 1))) --length; while (i < length && isWhitespace(cs.charAt(i))) ++i; boolean inWhiteSpace = false; for (; i < length; ++i) { char nextChar = cs.charAt(i); if (isWhitespace(nextChar)) { if (!inWhiteSpace) { sb.append(' '); inWhiteSpace = true; } } else { inWhiteSpace = false; sb.append(nextChar); } } } /** * Returns a whitespace-normalized version of the specified * character sequence. See {@link * #normalizeWhitespace(CharSequence,StringBuilder)} for * information on the normalization procedure. * * @param cs Character sequence to normalize. * @return Normalized version of character sequence. */ public static String normalizeWhitespace(CharSequence cs) { StringBuilder sb = new StringBuilder(); normalizeWhitespace(cs, sb); return sb.toString(); } /** * Returns <code>true</code> if the specified buffer contains * only whitespace characters. * * @param sb String buffer to test for whitespace. * @return <code>true</code> if the specified buffer contains only * whitespace characters. */ public static boolean allWhitespace(StringBuilder sb) { return allWhitespace(sb.toString()); } /** * Returns <code>true</code> if the specified string contains * only whitespace characters. * * @param s Stirng to test for whitespace. * @return <code>true</code> if the specified string contains only * whitespace characters. */ public static boolean allWhitespace(String s) { return allWhitespace(s.toCharArray(), 0, s.length()); } /** * Returns <code>true</code> if the specified range of the * specified character array only whitespace characters, as defined for * characters by {@link #isWhitespace(char c)}. * * @param ch Character array to test for whitespace characters in range. * @param start Beginning of range to test. * @param length Number of characters to test. * @return <code>true</code> if the specified string contains only * whitespace characters. */ public static boolean allWhitespace(char[] ch, int start, int length) { for (int i = start; i < start + length; ++i) if (!isWhitespace(ch[i])) return false; return true; } /** * Returns true if specified character is a whitespace character. * The definition in {@link * java.lang.Character#isWhitespace(char)} is extended to include * the unicode non-breakable space character (unicode 160). * * @param c Character to test. * @return <code>true</code> if specified character is a * whitespace. * @see java.lang.Character#isWhitespace(char) */ public static boolean isWhitespace(char c) { return Character.isWhitespace(c) || c == NBSP_CHAR; } /** * The non-breakable space character. */ public static char NBSP_CHAR = (char) 160; }