com.textocat.textokit.commons.annotator.AnnotationRemover.java Source code

Java tutorial

Introduction

Here is the source code for com.textocat.textokit.commons.annotator.AnnotationRemover.java

Source

/*
 *    Copyright 2015 Textocat
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package com.textocat.textokit.commons.annotator;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.textocat.textokit.commons.cas.FSTypeUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.component.CasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

import static com.textocat.textokit.commons.util.AnnotatorUtils.annotationTypeExist;

/**
 * @author Rinat Gareev
 */
public class AnnotationRemover extends CasAnnotator_ImplBase {

    public static final String PARAM_NAMESPACES_TO_REMOVE = "NamespacesToRemove";
    public static final String PARAM_TYPES_TO_REMOVE = "TypesToRemove";

    @ConfigurationParameter(name = PARAM_NAMESPACES_TO_REMOVE, mandatory = false)
    private String[] namespacesToRemove;
    @ConfigurationParameter(name = PARAM_TYPES_TO_REMOVE, mandatory = false)
    private String[] typeNamesToRemove;
    // derived config
    private Set<Type> typesToRemove;
    private Type annotationType;

    @Override
    public void typeSystemInit(TypeSystem ts) throws AnalysisEngineProcessException {
        super.typeSystemInit(ts);
        annotationType = ts.getType("uima.tcas.Annotation");
        annotationTypeExist("uima.tcas.Annotation", annotationType);
        typesToRemove = Sets.newHashSet();
        // process namespaces
        if (namespacesToRemove != null) {
            Set<String> namespacesToRemove = Sets.newHashSet(this.namespacesToRemove);
            Iterator<Type> typeIter = ts.getTypeIterator();
            while (typeIter.hasNext()) {
                Type t = typeIter.next();
                if (isAnnotationType(t, ts)) {
                    Set<String> tNamespaces = FSTypeUtils.getNamespaces(t);
                    if (!Sets.intersection(tNamespaces, namespacesToRemove).isEmpty()) {
                        typesToRemove.add(t);
                    }
                }
            }
        }
        // process certain types
        if (typeNamesToRemove != null) {
            for (String tName : typeNamesToRemove) {
                Type t = ts.getType(tName);
                annotationTypeExist(tName, t);
                if (isAnnotationType(t, ts)) {
                    typesToRemove.add(t);
                } else {
                    getLogger().warn(String.format("%s is not annotation type", t));
                }
            }
        }
        if (typesToRemove.isEmpty()) {
            getLogger().warn("Configuration of AnnotationRemover yields empty set of types to remove.");
        } else if (getLogger().isInfoEnabled()) {
            StringBuilder msgBuilder = new StringBuilder(
                    "Annotations of the following types will be removed from CAS:\n");
            Joiner.on('\n').appendTo(msgBuilder, typesToRemove);
            getLogger().info(msgBuilder.toString());
        }
        typesToRemove = ImmutableSet.copyOf(typesToRemove);
    }

    private boolean isAnnotationType(Type t, TypeSystem ts) {
        return ts.subsumes(annotationType, t);
    }

    @Override
    public void process(CAS cas) throws AnalysisEngineProcessException {
        for (Type t : typesToRemove) {
            removeAnnotations(cas, t);
        }
    }

    private void removeAnnotations(CAS cas, Type t) {
        LinkedList<AnnotationFS> annoToRemove = Lists.newLinkedList(cas.getAnnotationIndex(t));
        for (AnnotationFS anno : annoToRemove) {
            cas.removeFsFromIndexes(anno);
        }
        if (getLogger().isDebugEnabled()) {
            getLogger().debug(String.format("%s annotations of type %s have been removed", annoToRemove.size(), t));
        }
    }
}