me.smoe.adar.analyzer.luence.AnalyzerToy.java Source code

Java tutorial

Introduction

Here is the source code for me.smoe.adar.analyzer.luence.AnalyzerToy.java

Source

/**
 * Copyright (c) 2016, adar.w (adar.w@outlook.com) 
 * 
 * http://www.smoe.me
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package me.smoe.adar.analyzer.luence;

import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

public class AnalyzerToy {

    public static void analyzerByStop(String sentence) throws Exception {
        Analyzer analyzer = new StopAnalyzer();

        TokenStream tokenStream = analyzer.tokenStream(StringUtils.EMPTY, new StringReader(sentence));
        tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream
                    .getAttribute(CharTermAttribute.class);
            System.out.print(charTermAttribute.toString() + " ,");
        }

        analyzer.close();
    }

    public static Set<String> analyzerByStandard(String sentence) throws Exception {
        Analyzer analyzer = new StandardAnalyzer();
        try {
            TokenStream tokenStream = analyzer.tokenStream(StringUtils.EMPTY, new StringReader(sentence));
            tokenStream.addAttribute(CharTermAttribute.class);
            tokenStream.reset();

            Set<String> words = new HashSet<>();
            while (tokenStream.incrementToken()) {
                words.add(((CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class)).toString());
            }

            return words;
        } finally {
            analyzer.close();
        }
    }
}