de.tudarmstadt.lt.n2n.annotators.ExpansionsPrinter.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.lt.n2n.annotators.ExpansionsPrinter.java

Source

/*
 *   Copyright 2012
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */
package de.tudarmstadt.lt.n2n.annotators;

import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Comparator;
import java.util.List;

import jobimtext.thesaurus.contextual.type.cas.ContextualExpansion;
import jobimtext.thesaurus.contextual.type.cas.ContextualExpansions;
import jobimtext.thesaurus.distributional.type.cas.DistributionalExpansion;
import jobimtext.thesaurus.distributional.type.cas.DistributionalExpansions;

import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasConsumer_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;

import de.tudarmstadt.lt.utilities.ListUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;

/**
 *
 * @author Steffen Remus
 */
public class ExpansionsPrinter extends JCasConsumer_ImplBase {

    public static final String PARAM_TARGET_ANNOTATION_TYPE = "_target_annotation_type";
    @ConfigurationParameter(name = PARAM_TARGET_ANNOTATION_TYPE, mandatory = false)
    private Class<? extends Annotation> _target_annotation_type = Token.class; // default = Token class

    public static final String PARAM_TARGET_PRINTSTREAM = "_printstream_as_string";
    @ConfigurationParameter(name = PARAM_TARGET_PRINTSTREAM, mandatory = false)
    private String _printstream_as_string;
    private PrintStream _printstream;

    @Override
    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);

        if (_printstream_as_string == null || "sysout".equals(_printstream_as_string)
                || "System.out".equals(_printstream_as_string) || "stdout".equals(_printstream_as_string))
            _printstream = System.out;
        else if ("syserr".equals(_printstream_as_string) || "System.err".equals(_printstream_as_string)
                || "stderr".equals(_printstream_as_string))
            _printstream = System.err;
        else
            try {
                _printstream = new PrintStream(_printstream_as_string);
            } catch (FileNotFoundException e) {
                throw new ResourceInitializationException(e);
            }
    }

    private Comparator<DistributionalExpansion> _dt_expansions_comparator_desc = new Comparator<DistributionalExpansion>() {
        @Override
        public int compare(DistributionalExpansion o1, DistributionalExpansion o2) {
            return Double.compare(o2.getCount(), o1.getCount());
        }
    };

    private Comparator<ContextualExpansion> _ct_expansions_comparator_desc = new Comparator<ContextualExpansion>() {
        @Override
        public int compare(ContextualExpansion o1, ContextualExpansion o2) {
            int res = o2.getCtScore() < o1.getCtScore() ? -1 : o2.getCtScore() > o1.getCtScore() ? 1 : 0;
            if (res != 0)
                return res;
            return o2.getDtScore() < o1.getDtScore() ? -1 : o2.getDtScore() > o1.getDtScore() ? 1 : 0;
        }
    };

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        _printstream.format("=== %s begin [%s] ===%n%n", getClass().getSimpleName(),
                StringUtils.abbreviate(aJCas.getDocumentText(), 30).replace("\n", ""));
        for (Annotation target_instance : JCasUtil.select(aJCas, _target_annotation_type)) {
            _printstream.format("%s%n  DT:%n", target_instance.getCoveredText());
            for (DistributionalExpansions expansions_for_instance_part : JCasUtil
                    .selectCovered(DistributionalExpansions.class, target_instance)) {
                _printstream.format("    %-20.20s%n", expansions_for_instance_part.getCoveredText());
                List<DistributionalExpansion> dt_expansions = JCasUtil.selectCovered(DistributionalExpansion.class,
                        expansions_for_instance_part);
                int[] sorted_ids = ListUtils.sortIdsByValue(dt_expansions, _dt_expansions_comparator_desc); // sort expansions by dt value entry
                for (int i = 0; i < sorted_ids.length; i++) {
                    DistributionalExpansion expansion_for_instance_part = dt_expansions.get(sorted_ids[i]);
                    _printstream.format("      %-30.30s %.3f%n", expansion_for_instance_part.getKey(),
                            expansion_for_instance_part.getCount());
                }
            }

            _printstream.format("  CT:%n", target_instance.getCoveredText());
            for (ContextualExpansions expansions_for_instance_part : JCasUtil
                    .selectCovered(ContextualExpansions.class, target_instance)) {
                _printstream.format("    %-20.20s%n", expansions_for_instance_part.getCoveredText());
                List<ContextualExpansion> ct_expansions = JCasUtil.selectCovered(ContextualExpansion.class,
                        expansions_for_instance_part);
                int[] sorted_ids = ListUtils.sortIdsByValue(ct_expansions, _ct_expansions_comparator_desc); // sort expansions by ct and dt value entries
                for (int i = 0; i < sorted_ids.length; i++) {
                    ContextualExpansion expansion_for_instance_part = ct_expansions.get(sorted_ids[i]);
                    _printstream.format("      %-30.30s %6.3e (%.3f)%n", expansion_for_instance_part.getKey(),
                            expansion_for_instance_part.getCtScore(), expansion_for_instance_part.getDtScore());
                }
            }
            _printstream.println();
        }
        _printstream.format("=== %s begin ===%n%n", getClass().getSimpleName());
    }
}