org.apache.lucene.analysis.kr.test.MorphAnalyzerTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.analysis.kr.test.MorphAnalyzerTest.java

Source

/*
 * Copyright 2011-2013 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.kr.test;

import junit.framework.TestCase;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.kr.morph.*;
import org.apache.lucene.analysis.kr.utils.DictionaryUtil;
import org.junit.Ignore;
import org.junit.Test;

import java.io.File;
import java.util.*;

@Slf4j
public class MorphAnalyzerTest extends TestCase {

    @Test
    public void testMorphAnalyzer() throws Exception {

        String[] inputs = new String[] { "?", "?", "?", "",
                "??", "????", "??", "?", "",
                "", "?", "", "", "?", "",
                "", "?", "?", "?", "?",
                "", "?", "?", "??" //     ??  ??.
                , "", "", "", "?", "", "??",
                "?", "", "?", "??", "???",
                "4.19?", "x", "", "?" };

        MorphAnalyzer analyzer = new MorphAnalyzer();
        long start = 0;
        for (String input : inputs) {
            List<AnalysisOutput> list = analyzer.analyze(input);
            for (AnalysisOutput o : list) {
                System.out.print(o.toString() + "->");
                for (int i = 0; i < o.getCNounList().size(); i++) {
                    System.out.print(o.getCNounList().get(i).getWord() + "/");
                }
                System.out.print(o.getPatn());
                System.out.println("<" + o.getScore() + ">");
            }
            if (start == 0)
                start = System.currentTimeMillis();
        }
        System.out.println((System.currentTimeMillis() - start) + "ms");
    }

    @Test
    public void testCloneAnalysisOutput() throws Exception {
        AnalysisOutput output = new AnalysisOutput();

        output.setStem("aaaa");

        AnalysisOutput clone = output.clone();

        assertEquals("aaaa", clone.getStem());

        System.out.println(clone.getStem());
    }

    @Test
    public void testMorphAnalyzerManager() throws Exception {
        String input = " ?? ";

        MorphAnalyzerManager manager = new MorphAnalyzerManager();
        manager.analyze(input);
    }

    @Test
    public void testAlphaNumeric() throws Exception {
        String str = "0123456789azAZ";
        for (int i = 0; i < str.length(); i++) {
            System.out.println(str.charAt(i) + ":" + (str.charAt(i) - 0));
        }
    }

    @Test
    public void testGetWordEntry() throws Exception {
        String s = "";
        WordEntry we = DictionaryUtil.getCNoun(s);
        System.out.println(we.getWord());
    }

    /**
     * ?  ? ?     ?
     *
     * @throws Exception
     */
    @Test
    @Ignore(" ?? ?? .")
    public void yongonAnalysis() throws Exception {

        String fname = "data/_?.txt";

        List<String> list = FileUtils.readLines(new File(fname));
        Map<String, String> younons = new HashMap();

        MorphAnalyzer analyzer = new MorphAnalyzer();
        long start = 0;
        List youngOutputs = new ArrayList();
        for (String input : list) {

            if (!input.endsWith("") && !input.endsWith("?")) {
                youngOutputs.add(input);
                continue;
            }
            String eogan = input.substring(0, input.length() - 2);

            List<AnalysisOutput> outputs = analyzer.analyze(input);
            AnalysisOutput o = outputs.get(0);
            String result = o.toString() + "->";
            for (int i = 0; i < o.getCNounList().size(); i++) {
                result += o.getCNounList().get(i).getWord() + "/";
            }
            result += "<" + o.getScore() + ">";

            String tmp = younons.get(eogan);
            if (tmp == null) {
                younons.put(eogan, result);
            } else {
                younons.put(eogan, tmp + "| " + result);
            }
        }

        fname = "data/_?.txt";
        String cheonOutfile = "data/cheon.txt";
        String youngOutfile = "data/youngon.txt";

        List<String> cheons = FileUtils.readLines(new File(fname));
        List<String> outputs = new ArrayList();
        System.out.println(younons.size());
        for (String cheon : cheons) {
            String str = younons.remove(cheon);
            if (str != null) {
                cheon += "=> " + str;
                //            younons.remove(cheon);
            }
            outputs.add(cheon);
        }

        Iterator<String> iter = younons.keySet().iterator();
        while (iter.hasNext()) {
            String key = iter.next();
            outputs.add(key + "=> " + younons.get(key));
        }

        Collections.sort(outputs);
        Collections.sort(youngOutputs);

        FileUtils.writeLines(new File(cheonOutfile), outputs);
        FileUtils.writeLines(new File(youngOutfile), youngOutputs);

        outputs.addAll(youngOutputs);
        Collections.sort(outputs);
        FileUtils.writeLines(new File("data/all.txt"), outputs);
    }

    @Test
    public void testCompoundNounsWithinDic() throws Exception {

        String input = "?";

        WordEntry cnoun = DictionaryUtil.getCNoun(input);
        List<CompoundEntry> list = null;
        if (cnoun != null && cnoun.getFeature(WordEntry.IDX_NOUN) == '2') {
            list = cnoun.getCompounds();

            for (int j = 0; j < list.size(); j++) {
                System.out.println(list.get(j).getWord());
            }
        }

    }

    @Test
    public void testCompoundNouns() throws Exception {

        String input = "?";
        CompoundNounAnalyzer cnAnalyzer = new CompoundNounAnalyzer();
        cnAnalyzer.setExactMach(true);

        List<CompoundEntry> list = cnAnalyzer.analyze(input);
        if (list == null)
            return;

        for (CompoundEntry entry : list) {
            System.out.println(entry.getWord());
        }
    }
}