de.tudarmstadt.ukp.csniper.ml.NeAugmentationAnnotator.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.csniper.ml.NeAugmentationAnnotator.java

Source

/*******************************************************************************
 * Copyright 2013
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.csniper.ml;

import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.util.JCasUtil;

import de.tudarmstadt.ukp.csniper.webapp.search.tgrep.PennTreeNode;
import de.tudarmstadt.ukp.csniper.webapp.search.tgrep.PennTreeUtils;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree;

public class NeAugmentationAnnotator extends JCasAnnotator_ImplBase {
    private JCas jcas;

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        jcas = aJCas;
        for (PennTree pt : JCasUtil.select(aJCas, PennTree.class)) {
            String tree = StringUtils.normalizeSpace(pt.getPennTree());
            PennTreeNode ptn = PennTreeUtils.parsePennTree(tree);
            if (ptn != null) {
                int augmented = augment(ptn, JCasUtil.selectCovered(NamedEntity.class, pt));
                if (augmented > 0) {
                    getLogger().info("ORIGINAL PENNTREE:  [" + tree + "]");
                    tree = PennTreeUtils.toPennTree(ptn);
                    pt.setPennTree(tree);
                    getLogger().info("AUGMENTED PENNTREE: [" + tree + "]");
                }
            }
        }
    }

    private int augment(PennTreeNode aPennTreeNode, List<NamedEntity> sNes) {
        int count = 0;
        for (NamedEntity ne : sNes) {
            int tokenStartIndex = JCasUtil.selectCovered(jcas, Token.class, 0, ne.getBegin()).size();
            int coveredTokens = JCasUtil.selectCovered(Token.class, ne).size();
            int tokenEndIndex = tokenStartIndex + coveredTokens - 1;
            try {
                aPennTreeNode.insertIndexedNode(tokenStartIndex, tokenEndIndex, ne.getValue());
                count++;
            } catch (IllegalArgumentException e) {
                getLogger().warn("Can't insert NE [" + ne + "] into PennTree ["
                        + PennTreeUtils.toPennTree(aPennTreeNode) + "], because it crosses constituent borders.",
                        e);
            }
        }
        return count;
    }
}