Java tutorial
/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.kr.test; import junit.framework.TestCase; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.kr.KoreanAnalyzer; import org.apache.lucene.analysis.kr.KoreanFilter; import org.apache.lucene.analysis.kr.utils.HanjaUtils; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.Version; import java.io.StringReader; import java.util.ArrayList; import java.util.List; public class KoreanAnalyzerTest extends TestCase { /** * t.getPositionIncrement() ? ? ?, ? ? . * 1? ? ? ? ? ? * 0 ? ? ? ? ? ? ?. * ? ? ? ?? . ? ? ? ?? ? ?. * * @throws Exception */ public void testKoreanTokenizer() throws Exception { List<String> sources = new ArrayList<>(); sources.add("??? ??? ???"); sources.add(" ? ? ?."); sources.add("?"); sources.add( " ? . school is a good place "); sources.add( " \"ASP.NET ?? ? Lifecycle, Page? Lifecycle ? ? event ? , event handler ?, .\\n\" +\n" + " \"Spring MVC ? ? Controller ? Interceptor ? ??.\\n\" +\n" + " \" ?...\\n\" +\n" + " \"org.springframework.web.servlet.HandlerInterceptor ? org.springframework.web.servlet.handler.HandlerInterceptorAdapter ?? preHandler, postHandler, afterComletion ? ? ? .\\n\" +\n" + " \"servlet.xml ? ? Interceptor ?.\\n\" +\n" + " \" ?\\n\" +\n" + " \" . Spring Framework 3.2.1.RELEASE Hibernate 4.1.9 Final .\\n\" +\n" + " \"UnitOfWorkInterceptor ? ? Start , ? ? Close ?? . ? Hibernate ? Unit Of Work ? , ? ? ? ? Transaction ? , ?? Unit Of Work ? ?, Lifecycle ? Spring MVC ? ?? .\""); KoreanAnalyzer analyzer = new KoreanAnalyzer(Version.LUCENE_36); analyzer.setHasOrigin(false); for (String source : sources) { System.out.println("--------------------------"); System.out.println("Analyze source : " + source); System.out.println("--------------------------"); TokenStream stream = analyzer.tokenStream("s", new StringReader(source)); long start = System.currentTimeMillis(); while (stream.incrementToken()) { CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class); PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class); TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class); System.out.println(new String(termAttr.buffer(), 0, termAttr.length())); } System.out.println((System.currentTimeMillis() - start) + "ms"); } } public void testStandardTokenizer() throws Exception { String source = "??? ??? ???"; source = " ? . school is a good place "; long start = System.currentTimeMillis(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); TokenStream stream = analyzer.tokenStream("s", new StringReader(source)); TokenStream tok = new StandardFilter(Version.LUCENE_36, stream); while (tok.incrementToken()) { CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class); PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class); TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class); System.out.println(new String(termAttr.buffer(), 0, termAttr.length())); } System.out.println((System.currentTimeMillis() - start) + "ms"); } public void testJavaEscape() throws Exception { String str = StringEscapeUtils.unescapeHtml4("믕"); System.out.println(str); //?? String han = StringEscapeUtils.unescapeJava("0x3400"); han = StringEscapeUtils.escapeJava("?"); System.out.println(han); } public void testConvertHanja() throws Exception { String han = ""; for (int jj = 0; jj < han.length(); jj++) { char[] result = HanjaUtils.convertToHangul(han.charAt(jj)); for (char c : result) System.out.print(c); System.out.println(); } } public void testHanjaConvert() throws Exception { String source = " ??? "; long start = System.currentTimeMillis(); KoreanAnalyzer analyzer = new KoreanAnalyzer(); TokenStream stream = analyzer.tokenStream("s", new StringReader(source)); TokenStream tok = new KoreanFilter(stream); while (tok.incrementToken()) { CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class); PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class); TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class); System.out.println(new String(termAttr.buffer())); } System.out.println((System.currentTimeMillis() - start) + "ms"); } }