Java tutorial
/** * Copyright (c) 2016, adar.w (adar.w@outlook.com) * * http://www.smoe.me * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package me.smoe.adar.analyzer.luence; import java.io.StringReader; import java.util.HashSet; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class AnalyzerToy { public static void analyzerByStop(String sentence) throws Exception { Analyzer analyzer = new StopAnalyzer(); TokenStream tokenStream = analyzer.tokenStream(StringUtils.EMPTY, new StringReader(sentence)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream .getAttribute(CharTermAttribute.class); System.out.print(charTermAttribute.toString() + " ,"); } analyzer.close(); } public static Set<String> analyzerByStandard(String sentence) throws Exception { Analyzer analyzer = new StandardAnalyzer(); try { TokenStream tokenStream = analyzer.tokenStream(StringUtils.EMPTY, new StringReader(sentence)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); Set<String> words = new HashSet<>(); while (tokenStream.incrementToken()) { words.add(((CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class)).toString()); } return words; } finally { analyzer.close(); } } }