Here you can find the source of tokenize(String s)
Parameter | Description |
---|---|
s | - the string to split into tokens |
public static List<String> tokenize(String s)
//package com.java2s; /**/*from w w w.j ava 2 s.com*/ * Copyright 2016 * Ivan Cantador * Information Retrieval Group at Universidad Autonoma de Madrid * * This is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This software is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * the current software. If not, see <http://www.gnu.org/licenses/>. */ import java.util.ArrayList; import java.util.List; public class Main { /** * Splits a given string into tokens separated by ' ' or '_', and according * to the Camel case notation. * * @param s - the string to split into tokens * * @return the list of tokens in the input string */ public static List<String> tokenize(String s) { String currentToken = ""; List<String> tokens = new ArrayList<String>(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == ' ' || c == '_') { tokens.add(currentToken); currentToken = ""; continue; } if (Character.isDigit(c)) { continue; } if (Character.isUpperCase(c)) { c = Character.toLowerCase(c); if (currentToken.isEmpty()) { currentToken += c; } else { tokens.add(currentToken); currentToken = "" + c; } } else { currentToken += c; } } tokens.add(currentToken); return tokens; } }