Strings.java Source code

Java tutorial

Introduction

Here is the source code for Strings.java

Source

/*
 * LingPipe v. 3.9
 * Copyright (C) 2003-2010 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

//package com.aliasi.util;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import java.text.DecimalFormat;

/**
 * Static utility methods for processing strings, characters and
 * string buffers.
 *
 * @author  Bob Carpenter
 * @version 4.0.1
 * @since   LingPipe1.0
 * @see     java.lang.Character
 * @see     java.lang.String
 * @see     java.lang.StringBuilder
 */
public class Strings {

    /**
     * Appends a whitespace-normalized form of the specified character
     * sequence into the specified string buffer.  Initial and final
     * whitespaces are not appended, and every other maximal sequence
     * of contiguous whitespace is replaced with a single whitespace
     * character.  For instance, <code>&quot; a\tb\n&quot;</code>
     * would append the following characters to <code>&quot;a
     * b&quot;</code>.
     *
     * <P>This command is useful for text inputs for web or GUI
     * applications.
     *
     * @param cs Character sequence whose normalization is appended to
     * the buffer.
     * @param sb String buffer to which the normalized character
     * sequence is appended.
     */
    public static void normalizeWhitespace(CharSequence cs, StringBuilder sb) {
        int i = 0;
        int length = cs.length();
        while (length > 0 && isWhitespace(cs.charAt(length - 1)))
            --length;
        while (i < length && isWhitespace(cs.charAt(i)))
            ++i;
        boolean inWhiteSpace = false;
        for (; i < length; ++i) {
            char nextChar = cs.charAt(i);
            if (isWhitespace(nextChar)) {
                if (!inWhiteSpace) {
                    sb.append(' ');
                    inWhiteSpace = true;
                }
            } else {
                inWhiteSpace = false;
                sb.append(nextChar);
            }
        }
    }

    /**
     * Returns a whitespace-normalized version of the specified
     * character sequence.  See {@link
     * #normalizeWhitespace(CharSequence,StringBuilder)} for
     * information on the normalization procedure.
     *
     * @param cs Character sequence to normalize.
     * @return Normalized version of character sequence.
     */
    public static String normalizeWhitespace(CharSequence cs) {
        StringBuilder sb = new StringBuilder();
        normalizeWhitespace(cs, sb);
        return sb.toString();
    }

    /**
     * Returns <code>true</code> if the specified buffer contains
     * only whitespace characters.
     *
     * @param sb String buffer to test for whitespace.
     * @return <code>true</code> if the specified buffer contains only
     * whitespace characters.
     */
    public static boolean allWhitespace(StringBuilder sb) {
        return allWhitespace(sb.toString());
    }

    /**
     * Returns <code>true</code> if the specified string contains
     * only whitespace characters.
     *
     * @param s Stirng to test for whitespace.
     * @return <code>true</code> if the specified string contains only
     * whitespace characters.
     */
    public static boolean allWhitespace(String s) {
        return allWhitespace(s.toCharArray(), 0, s.length());
    }

    /**
     * Returns <code>true</code> if the specified range of the
     * specified character array only whitespace characters, as defined for
     * characters by {@link #isWhitespace(char c)}.
     *
     * @param ch Character array to test for whitespace characters in range.
     * @param start Beginning of range to test.
     * @param length Number of characters to test.
     * @return <code>true</code> if the specified string contains only
     * whitespace characters.
     */
    public static boolean allWhitespace(char[] ch, int start, int length) {
        for (int i = start; i < start + length; ++i)
            if (!isWhitespace(ch[i]))
                return false;
        return true;
    }

    /**
     * Returns true if specified character is a whitespace character.
     * The definition in {@link
     * java.lang.Character#isWhitespace(char)} is extended to include
     * the unicode non-breakable space character (unicode 160).
     *
     * @param c Character to test.
     * @return <code>true</code> if specified character is a
     * whitespace.
     * @see java.lang.Character#isWhitespace(char)
     */
    public static boolean isWhitespace(char c) {
        return Character.isWhitespace(c) || c == NBSP_CHAR;
    }

    /**
     * The non-breakable space character.
     */
    public static char NBSP_CHAR = (char) 160;
}