Java tutorial
/* * Copyright 2012 Nabeel Mukhtar * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package net.sf.jtmt.tokenizers; import java.io.File; import java.text.BreakIterator; import org.apache.commons.io.FileUtils; import com.ibm.icu.text.RuleBasedBreakIterator; /** * The Class SentenceTokenizer. */ public class SentenceTokenizer { /** The text. */ private String text; /** The index. */ private int index = 0; /** The break iterator. */ private RuleBasedBreakIterator breakIterator; /** * Instantiates a new sentence tokenizer. * * @throws Exception the exception */ public SentenceTokenizer() throws Exception { this("/resources/jtmt/sentence_break_rules.txt"); } /** * Instantiates a new sentence tokenizer. * * @param rulesfile the rulesfile * @throws Exception the exception */ public SentenceTokenizer(String rulesfile) throws Exception { super(); this.breakIterator = new RuleBasedBreakIterator( FileUtils.readFileToString(new File(getClass().getResource(rulesfile).getFile()), "UTF-8")); } /** * Sets the text. * * @param text the new text */ public void setText(String text) { this.text = text; this.breakIterator.setText(text); this.index = 0; } /** * Next sentence. * * @return the string */ public String nextSentence() { int end = breakIterator.next(); if (end == BreakIterator.DONE) { return null; } String sentence = text.substring(index, end); index = end; return sentence; } }