Java tutorial
/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.kr.test; import junit.framework.TestCase; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.kr.morph.*; import org.apache.lucene.analysis.kr.utils.DictionaryUtil; import org.junit.Ignore; import org.junit.Test; import java.io.File; import java.util.*; @Slf4j public class MorphAnalyzerTest extends TestCase { @Test public void testMorphAnalyzer() throws Exception { String[] inputs = new String[] { "?", "?", "?", "", "??", "????", "??", "?", "", "", "?", "", "", "?", "", "", "?", "?", "?", "?", "", "?", "?", "??" // ?? ??. , "", "", "", "?", "", "??", "?", "", "?", "??", "???", "4.19?", "x", "", "?" }; MorphAnalyzer analyzer = new MorphAnalyzer(); long start = 0; for (String input : inputs) { List<AnalysisOutput> list = analyzer.analyze(input); for (AnalysisOutput o : list) { System.out.print(o.toString() + "->"); for (int i = 0; i < o.getCNounList().size(); i++) { System.out.print(o.getCNounList().get(i).getWord() + "/"); } System.out.print(o.getPatn()); System.out.println("<" + o.getScore() + ">"); } if (start == 0) start = System.currentTimeMillis(); } System.out.println((System.currentTimeMillis() - start) + "ms"); } @Test public void testCloneAnalysisOutput() throws Exception { AnalysisOutput output = new AnalysisOutput(); output.setStem("aaaa"); AnalysisOutput clone = output.clone(); assertEquals("aaaa", clone.getStem()); System.out.println(clone.getStem()); } @Test public void testMorphAnalyzerManager() throws Exception { String input = " ?? "; MorphAnalyzerManager manager = new MorphAnalyzerManager(); manager.analyze(input); } @Test public void testAlphaNumeric() throws Exception { String str = "0123456789azAZ"; for (int i = 0; i < str.length(); i++) { System.out.println(str.charAt(i) + ":" + (str.charAt(i) - 0)); } } @Test public void testGetWordEntry() throws Exception { String s = ""; WordEntry we = DictionaryUtil.getCNoun(s); System.out.println(we.getWord()); } /** * ? ? ? ? * * @throws Exception */ @Test @Ignore(" ?? ?? .") public void yongonAnalysis() throws Exception { String fname = "data/_?.txt"; List<String> list = FileUtils.readLines(new File(fname)); Map<String, String> younons = new HashMap(); MorphAnalyzer analyzer = new MorphAnalyzer(); long start = 0; List youngOutputs = new ArrayList(); for (String input : list) { if (!input.endsWith("") && !input.endsWith("?")) { youngOutputs.add(input); continue; } String eogan = input.substring(0, input.length() - 2); List<AnalysisOutput> outputs = analyzer.analyze(input); AnalysisOutput o = outputs.get(0); String result = o.toString() + "->"; for (int i = 0; i < o.getCNounList().size(); i++) { result += o.getCNounList().get(i).getWord() + "/"; } result += "<" + o.getScore() + ">"; String tmp = younons.get(eogan); if (tmp == null) { younons.put(eogan, result); } else { younons.put(eogan, tmp + "| " + result); } } fname = "data/_?.txt"; String cheonOutfile = "data/cheon.txt"; String youngOutfile = "data/youngon.txt"; List<String> cheons = FileUtils.readLines(new File(fname)); List<String> outputs = new ArrayList(); System.out.println(younons.size()); for (String cheon : cheons) { String str = younons.remove(cheon); if (str != null) { cheon += "=> " + str; // younons.remove(cheon); } outputs.add(cheon); } Iterator<String> iter = younons.keySet().iterator(); while (iter.hasNext()) { String key = iter.next(); outputs.add(key + "=> " + younons.get(key)); } Collections.sort(outputs); Collections.sort(youngOutputs); FileUtils.writeLines(new File(cheonOutfile), outputs); FileUtils.writeLines(new File(youngOutfile), youngOutputs); outputs.addAll(youngOutputs); Collections.sort(outputs); FileUtils.writeLines(new File("data/all.txt"), outputs); } @Test public void testCompoundNounsWithinDic() throws Exception { String input = "?"; WordEntry cnoun = DictionaryUtil.getCNoun(input); List<CompoundEntry> list = null; if (cnoun != null && cnoun.getFeature(WordEntry.IDX_NOUN) == '2') { list = cnoun.getCompounds(); for (int j = 0; j < list.size(); j++) { System.out.println(list.get(j).getWord()); } } } @Test public void testCompoundNouns() throws Exception { String input = "?"; CompoundNounAnalyzer cnAnalyzer = new CompoundNounAnalyzer(); cnAnalyzer.setExactMach(true); List<CompoundEntry> list = cnAnalyzer.analyze(input); if (list == null) return; for (CompoundEntry entry : list) { System.out.println(entry.getWord()); } } }