Here you can find the source of tokenize(String string)
Parameter | Description |
---|---|
string | String to be tokenized. |
public static Map<String, AtomicInteger> tokenize(String string)
//package com.java2s; /*//w w w.jav a 2 s . c o m * Copyright (C) 2014 Dell, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; public class Main { /** * Parse the given string and return a list of the whitespace-delimited tokens that it * contains mapped to the number of occurrences of each token. Only whitespace * characters (SP, CR, LF, TAB, FF, VT) are used to delimit tokens. * * @param string String to be tokenized. * @return Map of tokens to occurrence counts. */ public static Map<String, AtomicInteger> tokenize(String string) { Map<String, AtomicInteger> result = new HashMap<String, AtomicInteger>(); String[] tokens = string.split("\\s"); // regular expression for "all whitespace" for (String token : tokens) { // For some reasons, sometimes split() creates empty values. if (token.length() == 0) { continue; } // Tokens are returned down-cased. String tokenDown = token.toLowerCase(); AtomicInteger count = result.get(tokenDown); if (count == null) { result.put(tokenDown, new AtomicInteger(1)); } else { count.incrementAndGet(); } } return result; } /** * Split the given string using the given separate, returning the components as a * set. This method does the opposite as {@link #concatenate(Collection, String)}. * If a null or empty string is passed, an empty set is returned. * * @param str String to be split. * @param sepStr Separator string that lies between values. * @return Set of separated substrings. The set may be empty but it will * not be null. */ public static Set<String> split(String str, String sepStr) { // Split but watch out for empty substrings. Set<String> result = new HashSet<String>(); if (str != null) { for (String value : str.split(sepStr)) { if (value.length() > 0) { result.add(value); } } } return result; } /** * Split the given string by a separator char. Unlike split(String,String), doesn't use RegEx * * @param str String to be split. * @param sepChr Separator character that lies between values. * @return List of separated substrings */ public static List<String> split(String str, char sepChr) { List<String> result = new ArrayList<String>(); int idx = 0; while (true) { int idx2 = str.indexOf(sepChr, idx); if (idx2 < 0) { result.add(str.substring(idx)); break; } result.add(str.substring(idx, idx2)); idx = idx2 + 1; } return result; } /** * Return the first index where the given character occurs in the given buffer or -1 * if is not found. This is like String.indexOf() but it works on byte[] arrays. If * the given buffer is null or empty, -1 is returned. * * @param buffer byte[] to search. * @param ch Character to find. * @return Zero-relative index where character was first found or -1 if the * character does not occur or is not found. */ public static int indexOf(byte[] buffer, char ch) { if (buffer == null) { return -1; } for (int index = 0; index < buffer.length; index++) { if (buffer[index] == ch) { return index; } } return -1; } }