Java tutorial
/* * Copyright 2012 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.lt.n2n.annotators; import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.Comparator; import java.util.List; import jobimtext.thesaurus.contextual.type.cas.ContextualExpansion; import jobimtext.thesaurus.contextual.type.cas.ContextualExpansions; import jobimtext.thesaurus.distributional.type.cas.DistributionalExpansion; import jobimtext.thesaurus.distributional.type.cas.DistributionalExpansions; import org.apache.commons.lang.StringUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasConsumer_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import de.tudarmstadt.lt.utilities.ListUtils; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; /** * * @author Steffen Remus */ public class ExpansionsPrinter extends JCasConsumer_ImplBase { public static final String PARAM_TARGET_ANNOTATION_TYPE = "_target_annotation_type"; @ConfigurationParameter(name = PARAM_TARGET_ANNOTATION_TYPE, mandatory = false) private Class<? extends Annotation> _target_annotation_type = Token.class; // default = Token class public static final String PARAM_TARGET_PRINTSTREAM = "_printstream_as_string"; @ConfigurationParameter(name = PARAM_TARGET_PRINTSTREAM, mandatory = false) private String _printstream_as_string; private PrintStream _printstream; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); if (_printstream_as_string == null || "sysout".equals(_printstream_as_string) || "System.out".equals(_printstream_as_string) || "stdout".equals(_printstream_as_string)) _printstream = System.out; else if ("syserr".equals(_printstream_as_string) || "System.err".equals(_printstream_as_string) || "stderr".equals(_printstream_as_string)) _printstream = System.err; else try { _printstream = new PrintStream(_printstream_as_string); } catch (FileNotFoundException e) { throw new ResourceInitializationException(e); } } private Comparator<DistributionalExpansion> _dt_expansions_comparator_desc = new Comparator<DistributionalExpansion>() { @Override public int compare(DistributionalExpansion o1, DistributionalExpansion o2) { return Double.compare(o2.getCount(), o1.getCount()); } }; private Comparator<ContextualExpansion> _ct_expansions_comparator_desc = new Comparator<ContextualExpansion>() { @Override public int compare(ContextualExpansion o1, ContextualExpansion o2) { int res = o2.getCtScore() < o1.getCtScore() ? -1 : o2.getCtScore() > o1.getCtScore() ? 1 : 0; if (res != 0) return res; return o2.getDtScore() < o1.getDtScore() ? -1 : o2.getDtScore() > o1.getDtScore() ? 1 : 0; } }; @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { _printstream.format("=== %s begin [%s] ===%n%n", getClass().getSimpleName(), StringUtils.abbreviate(aJCas.getDocumentText(), 30).replace("\n", "")); for (Annotation target_instance : JCasUtil.select(aJCas, _target_annotation_type)) { _printstream.format("%s%n DT:%n", target_instance.getCoveredText()); for (DistributionalExpansions expansions_for_instance_part : JCasUtil .selectCovered(DistributionalExpansions.class, target_instance)) { _printstream.format(" %-20.20s%n", expansions_for_instance_part.getCoveredText()); List<DistributionalExpansion> dt_expansions = JCasUtil.selectCovered(DistributionalExpansion.class, expansions_for_instance_part); int[] sorted_ids = ListUtils.sortIdsByValue(dt_expansions, _dt_expansions_comparator_desc); // sort expansions by dt value entry for (int i = 0; i < sorted_ids.length; i++) { DistributionalExpansion expansion_for_instance_part = dt_expansions.get(sorted_ids[i]); _printstream.format(" %-30.30s %.3f%n", expansion_for_instance_part.getKey(), expansion_for_instance_part.getCount()); } } _printstream.format(" CT:%n", target_instance.getCoveredText()); for (ContextualExpansions expansions_for_instance_part : JCasUtil .selectCovered(ContextualExpansions.class, target_instance)) { _printstream.format(" %-20.20s%n", expansions_for_instance_part.getCoveredText()); List<ContextualExpansion> ct_expansions = JCasUtil.selectCovered(ContextualExpansion.class, expansions_for_instance_part); int[] sorted_ids = ListUtils.sortIdsByValue(ct_expansions, _ct_expansions_comparator_desc); // sort expansions by ct and dt value entries for (int i = 0; i < sorted_ids.length; i++) { ContextualExpansion expansion_for_instance_part = ct_expansions.get(sorted_ids[i]); _printstream.format(" %-30.30s %6.3e (%.3f)%n", expansion_for_instance_part.getKey(), expansion_for_instance_part.getCtScore(), expansion_for_instance_part.getDtScore()); } } _printstream.println(); } _printstream.format("=== %s begin ===%n%n", getClass().getSimpleName()); } }