com.tom.deleteme.PrintAnnotations.java Source code

Java tutorial

Introduction

Here is the source code for com.tom.deleteme.PrintAnnotations.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.tom.deleteme;

import java.io.PrintStream;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.text.AnnotationFS;
import org.json.simple.JSONObject;

/**
 * A simple example of how to extract information from the CAS. This example retrieves all
 * annotations of a specified type from a CAS and prints them (along with all of their features) to
 * a PrintStream.
 * 
 * 
 */
public class PrintAnnotations {

    /**
     * Prints all Annotations to a PrintStream.
     * 
     * @param aCAS
     *          the CAS containing the FeatureStructures to print
     * @param aOut
     *          the PrintStream to which output will be written
     */

    public static final String OtherNamedTagsKey = "OtherNamedTags";
    public static final String tcasAnnotationKey = "uima.tcas.DocumentAnnotation";
    public static final String WorkExperienceKey = "WorkExperience";
    public static final String PinCodeKey = "PinCode";

    public static JSONObject printAnnotations(CAS aCAS, PrintStream aOut) {
        // get iterator over annotations
        FSIterator iter = aCAS.getAnnotationIndex().iterator();
        Map<String, String> map = new LinkedHashMap<String, String>();
        JSONObject json = new JSONObject();
        // iterate
        while (iter.isValid()) {
            FeatureStructure fs = iter.get();
            printAnnotatedText(fs, aCAS, 0, aOut, json);
            iter.moveToNext();
        }
        return json;
    }

    /**
     * Prints all Annotations of a specified Type to a PrintStream.
     * 
     * @param aCAS
     *          the CAS containing the FeatureStructures to print
     * @param aAnnotType
     *          the Type of Annotation to be printed
     * @param aOut
     *          the PrintStream to which output will be written
     */

    public static void printAnnotatedText(FeatureStructure aFS, CAS aCAS, int aNestingLevel, PrintStream aOut,
            JSONObject json) {
        String annotationKey = aFS.getType().getName();
        if (json.get(annotationKey) == null) {
            if (!annotationKey.equals(tcasAnnotationKey)) {
                if (aFS instanceof AnnotationFS) {
                    AnnotationFS annot = (AnnotationFS) aFS;
                    String coveredText = annot.getCoveredText();
                    if (annotationKey.equals(WorkExperienceKey)) {
                        coveredText = stringCleanUp(coveredText, "([\\d]{1,})?.?[\\d]{1,}");
                    }
                    if (annotationKey.equals(PinCodeKey)) {
                        coveredText = stringCleanUp(coveredText, "[\\d]{6}");
                    }
                    if (coveredText instanceof String) {
                        coveredText = processString(coveredText);
                    }
                    if (annotationKey.equals(WorkExperienceKey)) {
                        json.put(aFS.getType().getName(), Float.parseFloat(coveredText));
                    } else {
                        json.put(annotationKey, coveredText);
                    }
                }
            }
        } else if ((aFS.getType().getName()).equals(OtherNamedTagsKey)) {
            if (aFS instanceof AnnotationFS) {
                AnnotationFS annot = (AnnotationFS) aFS;
                String coveredText = annot.getCoveredText();
                coveredText = processString(coveredText);
                appendOtherNamedTags(json, coveredText);
            }
        }
    }

    public static String processString(String coveredText) {
        coveredText = coveredText.replaceAll("\\n|\\r", "");
        coveredText = coveredText.trim();
        return coveredText;
    }

    public static String stringCleanUp(String coveredText, String regex) {
        Pattern pattern = Pattern.compile(regex);
        Matcher match = pattern.matcher(coveredText);
        StringBuffer result = new StringBuffer();
        while (match.find()) {
            coveredText = match.group();
        }
        return coveredText;
    }

    public static void appendOtherNamedTags(JSONObject json, String coveredText) {
        String otherNamedTags = (String) json.get(OtherNamedTagsKey);
        StringJoiner joiner = new StringJoiner(",");
        joiner.add(otherNamedTags).add(coveredText);
        String joinedString = joiner.toString();
        json.put(OtherNamedTagsKey, joinedString);
    }
}