Appends a whitespace-normalized form of the specified character sequence into the specified string buffer.
/*
* LingPipe v. 3.9
* Copyright (C) 2003-2010 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
//package com.aliasi.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.DecimalFormat;
/**
* Static utility methods for processing strings, characters and
* string buffers.
*
* @author Bob Carpenter
* @version 4.0.1
* @since LingPipe1.0
* @see java.lang.Character
* @see java.lang.String
* @see java.lang.StringBuilder
*/
public class Strings {
/**
* Appends a whitespace-normalized form of the specified character
* sequence into the specified string buffer. Initial and final
* whitespaces are not appended, and every other maximal sequence
* of contiguous whitespace is replaced with a single whitespace
* character. For instance, <code>" a\tb\n"</code>
* would append the following characters to <code>"a
* b"</code>.
*
* <P>This command is useful for text inputs for web or GUI
* applications.
*
* @param cs Character sequence whose normalization is appended to
* the buffer.
* @param sb String buffer to which the normalized character
* sequence is appended.
*/
public static void normalizeWhitespace(CharSequence cs, StringBuilder sb) {
int i = 0;
int length = cs.length();
while (length > 0 && isWhitespace(cs.charAt(length-1)))
--length;
while (i < length && isWhitespace(cs.charAt(i)))
++i;
boolean inWhiteSpace = false;
for ( ; i < length; ++i) {
char nextChar = cs.charAt(i);
if (isWhitespace(nextChar)) {
if (!inWhiteSpace) {
sb.append(' ');
inWhiteSpace = true;
}
} else {
inWhiteSpace = false;
sb.append(nextChar);
}
}
}
/**
* Returns a whitespace-normalized version of the specified
* character sequence. See {@link
* #normalizeWhitespace(CharSequence,StringBuilder)} for
* information on the normalization procedure.
*
* @param cs Character sequence to normalize.
* @return Normalized version of character sequence.
*/
public static String normalizeWhitespace(CharSequence cs) {
StringBuilder sb = new StringBuilder();
normalizeWhitespace(cs,sb);
return sb.toString();
}
/**
* Returns <code>true</code> if the specified buffer contains
* only whitespace characters.
*
* @param sb String buffer to test for whitespace.
* @return <code>true</code> if the specified buffer contains only
* whitespace characters.
*/
public static boolean allWhitespace(StringBuilder sb) {
return allWhitespace(sb.toString());
}
/**
* Returns <code>true</code> if the specified string contains
* only whitespace characters.
*
* @param s Stirng to test for whitespace.
* @return <code>true</code> if the specified string contains only
* whitespace characters.
*/
public static boolean allWhitespace(String s) {
return allWhitespace(s.toCharArray(),0,s.length());
}
/**
* Returns <code>true</code> if the specified range of the
* specified character array only whitespace characters, as defined for
* characters by {@link #isWhitespace(char c)}.
*
* @param ch Character array to test for whitespace characters in range.
* @param start Beginning of range to test.
* @param length Number of characters to test.
* @return <code>true</code> if the specified string contains only
* whitespace characters.
*/
public static boolean allWhitespace(char[] ch, int start, int length) {
for (int i = start; i < start+length; ++i)
if (!isWhitespace(ch[i])) return false;
return true;
}
/**
* Returns true if specified character is a whitespace character.
* The definition in {@link
* java.lang.Character#isWhitespace(char)} is extended to include
* the unicode non-breakable space character (unicode 160).
*
* @param c Character to test.
* @return <code>true</code> if specified character is a
* whitespace.
* @see java.lang.Character#isWhitespace(char)
*/
public static boolean isWhitespace(char c) {
return Character.isWhitespace(c) || c == NBSP_CHAR;
}
/**
* The non-breakable space character.
*/
public static char NBSP_CHAR = (char)160;
}
Related examples in the same category