Java tutorial
/** * Copyright (c) 2016, adar.w (adar.w@outlook.com) * * http://www.smoe.me * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package me.smoe.adar.utils.cam.o.common; import java.io.StringReader; import java.util.Collections; import java.util.LinkedHashSet; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class SentenceAnalyzer { public static Set<String> analyzer(String sentence) throws Exception { if (StringUtils.isEmpty(sentence)) { return Collections.emptySet(); } Analyzer analyzer = new StandardAnalyzer(); try { TokenStream tokenStream = analyzer.tokenStream(StringUtils.EMPTY, new StringReader(sentence)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); Set<String> words = new LinkedHashSet<>(); while (tokenStream.incrementToken()) { String word = ((CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class)).toString(); if (word.length() <= 1) { continue; } words.add(word); } return words; } finally { analyzer.close(); } } }