Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * SimpleStringTokenizer.java * Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand */ package weka.core.tokenizers; import java.util.StringTokenizer; import weka.core.RevisionUtils; /** * <!-- globalinfo-start --> A simple tokenizer that is using the * java.util.StringTokenizer class to tokenize the strings. * <p/> * <!-- globalinfo-end --> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -delimiters <value> * The delimiters to use * (default ' \r\n\t.,;:'"()?!'). * </pre> * * <!-- options-end --> * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WordTokenizer extends CharacterDelimitedTokenizer { /** for serialization */ private static final long serialVersionUID = -930893034037880773L; /** the actual tokenizer */ protected transient StringTokenizer m_Tokenizer; /** * Returns a string describing the stemmer * * @return a description suitable for displaying in the explorer/experimenter * gui */ @Override public String globalInfo() { return "A simple tokenizer that is using the java.util.StringTokenizer " + "class to tokenize the strings."; } /** * Tests if this enumeration contains more elements. * * @return true if and only if this enumeration object contains at least one * more element to provide; false otherwise. */ @Override public boolean hasMoreElements() { return m_Tokenizer.hasMoreElements(); } /** * Returns the next element of this enumeration if this enumeration object has * at least one more element to provide. * * @return the next element of this enumeration. */ @Override public String nextElement() { return m_Tokenizer.nextToken(); } /** * Sets the string to tokenize. Tokenization happens immediately. * * @param s the string to tokenize */ @Override public void tokenize(String s) { m_Tokenizer = new StringTokenizer(s, getDelimiters()); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } /** * Runs the tokenizer with the given options and strings to tokenize. The * tokens are printed to stdout. * * @param args the commandline options and strings to tokenize */ public static void main(String[] args) { runTokenizer(new WordTokenizer(), args); } }