Java AtomicInteger tokenize(String string)

Description

Parse the given string and return a list of the whitespace-delimited tokens that it contains mapped to the number of occurrences of each token.

License

Apache License

Parameter

Parameter	Description
string	String to be tokenized.

Return

Map of tokens to occurrence counts.

Declaration

public static Map<String, AtomicInteger> tokenize(String string)

Method Source Code

//package com.java2s;
/*//w w w.jav a  2 s  .  c  o m
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.ArrayList;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import java.util.concurrent.atomic.AtomicInteger;

public class Main {
    /**
     * Parse the given string and return a list of the whitespace-delimited tokens that it
     * contains mapped to the number of occurrences of each token. Only whitespace
     * characters (SP, CR, LF, TAB, FF, VT) are used to delimit tokens.
     *
     * @param string    String to be tokenized.
     * @return          Map of tokens to occurrence counts.
     */
    public static Map<String, AtomicInteger> tokenize(String string) {
        Map<String, AtomicInteger> result = new HashMap<String, AtomicInteger>();
        String[] tokens = string.split("\\s"); // regular expression for "all whitespace"
        for (String token : tokens) {
            // For some reasons, sometimes split() creates empty values.
            if (token.length() == 0) {
                continue;
            }

            // Tokens are returned down-cased.
            String tokenDown = token.toLowerCase();
            AtomicInteger count = result.get(tokenDown);
            if (count == null) {
                result.put(tokenDown, new AtomicInteger(1));
            } else {
                count.incrementAndGet();
            }
        }
        return result;
    }

    /**
     * Split the given string using the given separate, returning the components as a
     * set. This method does the opposite as {@link #concatenate(Collection, String)}.
     * If a null or empty string is passed, an empty set is returned.
     *
     * @param str       String to be split.
     * @param sepStr    Separator string that lies between values.
     * @return          Set of separated substrings. The set may be empty but it will
     *                  not be null.
     */
    public static Set<String> split(String str, String sepStr) {
        // Split but watch out for empty substrings.
        Set<String> result = new HashSet<String>();
        if (str != null) {
            for (String value : str.split(sepStr)) {
                if (value.length() > 0) {
                    result.add(value);
                }
            }
        }
        return result;
    }

    /**
     * Split the given string by a separator char. Unlike split(String,String), doesn't use RegEx
     *
     * @param str       String to be split.
     * @param sepChr    Separator character that lies between values.
     * @return          List of separated substrings
     */
    public static List<String> split(String str, char sepChr) {
        List<String> result = new ArrayList<String>();
        int idx = 0;
        while (true) {
            int idx2 = str.indexOf(sepChr, idx);
            if (idx2 < 0) {
                result.add(str.substring(idx));
                break;
            }
            result.add(str.substring(idx, idx2));
            idx = idx2 + 1;
        }
        return result;
    }

    /**
     * Return the first index where the given character occurs in the given buffer or -1
     * if is not found. This is like String.indexOf() but it works on byte[] arrays. If
     * the given buffer is null or empty, -1 is returned.
     * 
     * @param buffer    byte[] to search.
     * @param ch        Character to find.
     * @return          Zero-relative index where character was first found or -1 if the
     *                  character does not occur or is not found.
     */
    public static int indexOf(byte[] buffer, char ch) {
        if (buffer == null) {
            return -1;
        }
        for (int index = 0; index < buffer.length; index++) {
            if (buffer[index] == ch) {
                return index;
            }
        }
        return -1;
    }
}

Java AtomicInteger tokenize(String string)

Description

License

Parameter

Return

Declaration

Method Source Code

Related