gate.Utils.java Source code

Java tutorial

Introduction

Here is the source code for gate.Utils.java

Source

/*
 *  Utils.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution annotationSet file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Johann Petrak, 2010-02-05
 *
 *  $Id: Main.java 12006 2009-12-01 17:24:28Z thomas_heitz $
 */

package gate;

import gate.annotation.AnnotationSetImpl;
import gate.annotation.ImmutableAnnotationSetImpl;
import gate.creole.ConditionalSerialController;
import gate.creole.RunningStrategy;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.Level;

/**
 * Various utility methods to make often-needed tasks more easy and
 * using up less code.  In Java code (or JAPE grammars) you may wish to
 * <code>import static gate.Utils.*</code> to access these methods without
 * having to qualify them with a class name.  In Groovy code, this class can be
 * used as a category to inject each utility method into the class of its first
 * argument, e.g.
 * <pre>
 * Document doc = // ...
 * Annotation ann = // ...
 * use(gate.Utils) {
 *   println "Annotation has ${ann.length()} characters"
 *   println "and covers the string \"${doc.stringFor(ann)}\""
 * }
 * </pre>
 *
 * @author Johann Petrak, Ian Roberts
 */
public class Utils {
    /**
     * Return the length of the document content covered by an Annotation as an
     * int -- if the content is too long for an int, the method will throw
     * a GateRuntimeException. Use getLengthLong(SimpleAnnotation ann) if
     * this situation could occur.
     * @param ann the annotation for which to determine the length
     * @return the length of the document content covered by this annotation.
     */
    public static int length(SimpleAnnotation ann) {
        long len = lengthLong(ann);
        if (len > java.lang.Integer.MAX_VALUE) {
            throw new GateRuntimeException("Length of annotation too big to be returned as an int: " + len);
        } else {
            return (int) len;
        }
    }

    /**
     * Return the length of the document content covered by an Annotation as a
     * long.
     * @param ann the annotation for which to determine the length
     * @return the length of the document content covered by this annotation.
     */
    public static long lengthLong(SimpleAnnotation ann) {
        return ann.getEndNode().getOffset() - ann.getStartNode().getOffset();
    }

    /**
     * Return the length of the document as an
     * int -- if the content is too long for an int, the method will throw a
     * GateRuntimeException. Use getLengthLong(Document doc) if
     * this situation could occur.
     * @param doc the document for which to determine the length
     * @return the length of the document content.
     */
    public static int length(Document doc) {
        long len = doc.getContent().size();
        if (len > java.lang.Integer.MAX_VALUE) {
            throw new GateRuntimeException("Length of document too big to be returned as an int: " + len);
        } else {
            return (int) len;
        }
    }

    /**
     * Return the length of the document as a long.
     * @param doc the document for which to determine the length
     * @return the length of the document content.
     */
    public static long lengthLong(Document doc) {
        return doc.getContent().size();
    }

    /**
     * Return the DocumentContent corresponding to the annotation.
     * <p>
     * Note: the DocumentContent object returned will also contain the
     * original content which can be accessed using the getOriginalContent()
     * method.
     * @param doc the document from which to extract the content
     * @param ann the annotation for which to return the content.
     * @return a DocumentContent representing the content spanned by the annotation.
     */
    public static DocumentContent contentFor(SimpleDocument doc, SimpleAnnotation ann) {
        try {
            return doc.getContent().getContent(ann.getStartNode().getOffset(), ann.getEndNode().getOffset());
        } catch (gate.util.InvalidOffsetException ex) {
            throw new GateRuntimeException(ex.getMessage());
        }
    }

    /**
     * Return the document text as a String corresponding to the annotation.
     * @param doc the document from which to extract the document text
     * @param ann the annotation for which to return the text.
     * @return a String representing the text content spanned by the annotation.
     */
    public static String stringFor(Document doc, SimpleAnnotation ann) {
        try {
            return doc.getContent().getContent(ann.getStartNode().getOffset(), ann.getEndNode().getOffset())
                    .toString();
        } catch (gate.util.InvalidOffsetException ex) {
            throw new GateRuntimeException(ex.getMessage());
        }
    }

    /**
     * Return the cleaned document text as a String corresponding to the annotation.
     * (Delete leading and trailing whitespace; normalize 
     * internal whitespace to single spaces.)
     * @param doc the document from which to extract the document text
     * @param ann the annotation for which to return the text.
     * @return a String representing the text content spanned by the annotation.
     */
    public static String cleanStringFor(Document doc, SimpleAnnotation ann) {
        return cleanString(stringFor(doc, ann));
    }

    /**
     * Returns the document text between the provided offsets.
     * @param doc the document from which to extract the document text
     * @param start the start offset 
     * @param end the end offset
     * @return document text between the provided offsets
     */
    public static String stringFor(Document doc, Long start, Long end) {
        try {
            return doc.getContent().getContent(start, end).toString();
        } catch (gate.util.InvalidOffsetException ex) {
            throw new GateRuntimeException(ex.getMessage());
        }
    }

    /**
     * Return the cleaned document text between the provided offsets.
     * (Delete leading and trailing whitespace; normalize 
     * internal whitespace to single spaces.)
     * @param doc the document from which to extract the document text
     * @param start the start offset 
     * @param end the end offset
     * @return document text between the provided offsets
     */
    public static String cleanStringFor(Document doc, Long start, Long end) {
        return cleanString(stringFor(doc, start, end));
    }

    /**
     * Return the DocumentContent covered by the given annotation set.
     * <p>
     * Note: the DocumentContent object returned will also contain the
     * original content which can be accessed using the getOriginalContent()
     * method.
     * @param doc the document from which to extract the content
     * @param anns the annotation set for which to return the content.
     * @return a DocumentContent representing the content spanned by the
     * annotation set.
     */
    public static DocumentContent contentFor(SimpleDocument doc, AnnotationSet anns) {
        try {
            return doc.getContent().getContent(anns.firstNode().getOffset(), anns.lastNode().getOffset());
        } catch (gate.util.InvalidOffsetException ex) {
            throw new GateRuntimeException(ex.getMessage());
        }
    }

    /**
     * Return the document text as a String covered by the given annotation set.
     * @param doc the document from which to extract the document text
     * @param anns the annotation set for which to return the text.
     * @return a String representing the text content spanned by the annotation
     * set.
     */
    public static String stringFor(Document doc, AnnotationSet anns) {
        try {
            return doc.getContent().getContent(anns.firstNode().getOffset(), anns.lastNode().getOffset())
                    .toString();
        } catch (gate.util.InvalidOffsetException ex) {
            throw new GateRuntimeException(ex.getMessage());
        }
    }

    /**
     * Return the cleaned document text as a String covered by the given annotation set.
     * (Delete leading and trailing whitespace; normalize 
     * internal whitespace to single spaces.)
     * @param doc the document from which to extract the document text
     * @param anns the annotation set for which to return the text.
     * @return a String representing the text content spanned by the annotation
     * set.
     */
    public static String cleanStringFor(Document doc, AnnotationSet anns) {
        return cleanString(stringFor(doc, anns));
    }

    /**
     * Return a cleaned version of the input String. (Delete leading and trailing
     * whitespace; normalize internal whitespace to single spaces; return an
     * empty String if the input contains nothing but whitespace, but null
     * if the input is null.)
     * @return a cleaned version of the input String.
     */
    public static String cleanString(String input) {
        if (input == null) {
            return null;
        }
        // implied else
        return input.replaceAll("\\s+", " ").trim();
    }

    /**
     * Get the start offset of an annotation.
     */
    public static Long start(SimpleAnnotation a) {
        return (a.getStartNode() == null) ? null : a.getStartNode().getOffset();
    }

    /**
     * Get the start offset of an annotation set.
     */
    public static Long start(AnnotationSet as) {
        return (as.firstNode() == null) ? null : as.firstNode().getOffset();
    }

    /**
     * Get the start offset of a document (i.e. 0L).
     */
    public static Long start(SimpleDocument d) {
        return Long.valueOf(0L);
    }

    /**
     * Get the end offset of an annotation.
     */
    public static Long end(SimpleAnnotation a) {
        return (a.getEndNode() == null) ? null : a.getEndNode().getOffset();
    }

    /**
     * Get the end offset of an annotation set.
     */
    public static Long end(AnnotationSet as) {
        return (as.lastNode() == null) ? null : as.lastNode().getOffset();
    }

    /**
     * Get the end offset of a document.
     */
    public static Long end(SimpleDocument d) {
        return d.getContent().size();
    }

    /**
     * Return a the subset of annotations from the given annotation set
     * that start exactly at the given offset.
     *
     * @param annotationSet the set of annotations from which to select
     * @param atOffset the offset where the annoation to be returned should start
     * @return an annotation set containing all the annotations from the original
     * set that start at the given offset
     */
    public static AnnotationSet getAnnotationsAtOffset(AnnotationSet annotationSet, Long atOffset) {
        // this returns all annotations that start at this atOffset OR AFTER!
        AnnotationSet tmp = annotationSet.get(atOffset);
        // so lets filter ...
        AnnotationSet ret = new AnnotationSetImpl(annotationSet.getDocument());
        Iterator<Annotation> it = tmp.iterator();
        while (it.hasNext()) {
            Annotation ann = it.next();
            if (ann.getStartNode().getOffset().equals(atOffset)) {
                ret.add(ann);
            }
        }
        return ret;
    }

    /**
     * Get all the annotations from the source annotation set that lie within
     * the range of the containing annotation.
     * 
     * @param sourceAnnotationSet the annotation set from which to select
     * @param containingAnnotation the annotation whose range must contain the
     * selected annotations
     * @return the AnnotationSet containing all annotations fully contained in
     * the offset range of the containingAnnotation
     */
    public static AnnotationSet getContainedAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation containingAnnotation) {
        return getContainedAnnotations(sourceAnnotationSet, containingAnnotation, "");
    }

    /**
     * Get all the annotations of type targetType
     * from the source annotation set that lie within
     * the range of the containing annotation.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param containingAnnotation the annotation whose range must contain the
     * @param targetType the type the selected annotations must have. If the
     * empty string, no filtering on type is done.
     * @return the AnnotationSet containing all annotations fully contained in
     * the offset range of the containingAnnotation
     */
    public static AnnotationSet getContainedAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation containingAnnotation, String targetType) {
        if (targetType.equals("")) {
            return sourceAnnotationSet.getContained(containingAnnotation.getStartNode().getOffset(),
                    containingAnnotation.getEndNode().getOffset());
        } else {
            return sourceAnnotationSet.getContained(containingAnnotation.getStartNode().getOffset(),
                    containingAnnotation.getEndNode().getOffset()).get(targetType);
        }
    }

    /**
     * Get all the annotations from the source annotation set that lie within
     * the range of the containing annotation set, i.e. within the offset range
     * between the start of the first annotation in the containing set and the
     * end of the last annotation in the annotation set. If the containing
     * annotation set is empty, an empty set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param containingAnnotationSet the annotation set whose range must contain
     * the selected annotations
     * @return the AnnotationSet containing all annotations fully contained in
     * the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getContainedAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet containingAnnotationSet) {
        return getContainedAnnotations(sourceAnnotationSet, containingAnnotationSet, "");
    }

    /**
     * Get all the annotations from the source annotation set with a type equal to
     * targetType that lie within
     * the range of the containing annotation set, i.e. within the offset range
     * between the start of the first annotation in the containing set and the
     * end of the last annotation in the annotation set. If the containing
     * annotation set is empty, an empty set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param containingAnnotationSet the annotation set whose range must contain
     * the selected annotations
     * @param targetType the type the selected annotations must have
     * @return the AnnotationSet containing all annotations fully contained in
     * the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getContainedAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet containingAnnotationSet, String targetType) {
        if (containingAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) {
            return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null);
        }
        if (targetType.equals("")) {
            return sourceAnnotationSet.getContained(containingAnnotationSet.firstNode().getOffset(),
                    containingAnnotationSet.lastNode().getOffset());
        } else {
            return sourceAnnotationSet.getContained(containingAnnotationSet.firstNode().getOffset(),
                    containingAnnotationSet.lastNode().getOffset()).get(targetType);
        }
    }

    /**
     * Get all the annotations from the source annotation set that cover
     * the range of the specified annotation.
     * 
     * @param sourceAnnotationSet the annotation set from which to select
     * @param coveredAnnotation the annotation whose range must equal or lie within
     * the selected annotations
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the coveredAnnotation
     */
    public static AnnotationSet getCoveringAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation coveredAnnotation) {
        return getCoveringAnnotations(sourceAnnotationSet, coveredAnnotation, "");
    }

    /**
     * Get all the annotations of type targetType
     * from the source annotation set that cover
     * the range of the specified annotation.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param coveredAnnotation the annotation whose range must be covered
     * @param targetType the type the selected annotations must have. If the
     * empty string, no filtering on type is done.
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the coveredAnnotation
     */
    public static AnnotationSet getCoveringAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation coveredAnnotation, String targetType) {
        return sourceAnnotationSet.getCovering(targetType, coveredAnnotation.getStartNode().getOffset(),
                coveredAnnotation.getEndNode().getOffset());
    }

    /**
     * Get all the annotations from the source annotation set that cover
     * the range of the specified annotation set. If the covered
     * annotation set is empty, an empty set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param coveredAnnotationSet the annotation set whose range must be covered by
     * the selected annotations
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getCoveringAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet coveredAnnotationSet) {
        return getCoveringAnnotations(sourceAnnotationSet, coveredAnnotationSet, "");
    }

    /**
     * Get all the annotations from the source annotation set with a type equal to
     * targetType that cover
     * the range of the specified annotation set. If the specified
     * annotation set is empty, an empty set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param coveredAnnotationSet the annotation set whose range must
     * be covered by the selected annotations
     * @param targetType the type the selected annotations must have
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getCoveringAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet coveredAnnotationSet, String targetType) {
        if (coveredAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) {
            return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null);
        }
        return sourceAnnotationSet.getCovering(targetType, coveredAnnotationSet.firstNode().getOffset(),
                coveredAnnotationSet.lastNode().getOffset());
    }

    /**
     * Get all the annotations from the source annotation set that
     * partly or totally overlap
     * the range of the specified annotation.
     * 
     * @param sourceAnnotationSet the annotation set from which to select
     * @param overlappedAnnotation the annotation whose range the selected
     * annotations must overlap
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the coveredAnnotation
     */
    public static AnnotationSet getOverlappingAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation overlappedAnnotation) {
        return getOverlappingAnnotations(sourceAnnotationSet, overlappedAnnotation, "");
    }

    /**
     * Get all the annotations of type targetType
     * from the source annotation set that partly or totally overlap
     * the range of the specified annotation.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param overlappedAnnotation the annotation whose range the selected
     * annotations must overlap
     * @param targetType the type the selected annotations must have. If the
     * empty string, no filtering on type is done.
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the coveredAnnotation
     */
    public static AnnotationSet getOverlappingAnnotations(AnnotationSet sourceAnnotationSet,
            Annotation overlappedAnnotation, String targetType) {

        if ((targetType == null) || targetType.isEmpty()) {
            return sourceAnnotationSet.get(overlappedAnnotation.getStartNode().getOffset(),
                    overlappedAnnotation.getEndNode().getOffset());
        }

        return sourceAnnotationSet.get(targetType, overlappedAnnotation.getStartNode().getOffset(),
                overlappedAnnotation.getEndNode().getOffset());
    }

    /**
     * Get all the annotations from the source annotation set that overlap
     * the range of the specified annotation set. If the overlapped
     * annotation set is empty, an empty set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param overlappedAnnotationSet the annotation set whose range must
     * be overlapped by the selected annotations
     * @return the AnnotationSet containing all annotations that fully cover
     * the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getOverlappingAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet overlappedAnnotationSet) {
        return getOverlappingAnnotations(sourceAnnotationSet, overlappedAnnotationSet, "");
    }

    /**
     * Get all the annotations from the source annotation set with a type equal to
     * targetType that partly or completely overlap the range of the specified 
     * annotation set. If the specified annotation set is empty, an empty 
     * set is returned.
     *
     * @param sourceAnnotationSet the annotation set from which to select
     * @param overlappedAnnotationSet the annotation set whose range must
     * be overlapped by the selected annotations
     * @param targetType the type the selected annotations must have
     * @return the AnnotationSet containing all annotations that partly or fully
     * overlap the offset range of the containingAnnotationSet
     */
    public static AnnotationSet getOverlappingAnnotations(AnnotationSet sourceAnnotationSet,
            AnnotationSet overlappedAnnotationSet, String targetType) {
        if (overlappedAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) {
            return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null);
        }

        if ((targetType == null) || targetType.isEmpty()) {
            return sourceAnnotationSet.get(overlappedAnnotationSet.firstNode().getOffset(),
                    overlappedAnnotationSet.lastNode().getOffset());
        }

        return sourceAnnotationSet.get(targetType, overlappedAnnotationSet.firstNode().getOffset(),
                overlappedAnnotationSet.lastNode().getOffset());
    }

    /**
     * Return a List containing the annotations in the given annotation set, in
     * document order (i.e. increasing order of start offset).
     *
     * @param as the annotation set
     * @return a list containing the annotations from <code>as</code> in document
     * order.
     */
    public static List<Annotation> inDocumentOrder(AnnotationSet as) {
        List<Annotation> ret = new ArrayList<Annotation>();
        if (as != null) {
            ret.addAll(as);
            Collections.sort(ret, OFFSET_COMPARATOR);
        }
        return ret;
    }

    /**
     * A single instance of {@link OffsetComparator} that can be used by any code
     * that requires one.
     */
    public static final OffsetComparator OFFSET_COMPARATOR = new OffsetComparator();

    /**
     * Create a feature map from an array of values.  The array must have an even
     * number of items, alternating keys and values i.e. [key1, value1, key2,
     * value2, ...].
     *
     * @param values an even number of items, alternating keys and values.
     * @return a feature map containing the given items.
     */
    public static FeatureMap featureMap(Object... values) {
        FeatureMap fm = Factory.newFeatureMap();
        if (values != null) {
            for (int i = 0; i < values.length; i++) {
                fm.put(values[i], values[++i]);
            }
        }
        return fm;
    }

    /**
     * Create a feature map from an existing map (typically one that does not
     * itself implement FeatureMap).
     *
     * @param map the map to convert.
     * @return a new FeatureMap containing the same mappings as the source map.
     */
    public static FeatureMap toFeatureMap(Map<?, ?> map) {
        FeatureMap fm = Factory.newFeatureMap();
        fm.putAll(map);
        return fm;
    }

    /** 
     * This method can be used to check if a ProcessingResource has 
     * a chance to be run in the given controller with the current settings.
     * <p>
     * That means that for a non-conditional controller, the method will return
     * true if the PR is part of the controller. For a conditional controller,
     * the method will return true if it is part of the controller and at least
     * once (if the same PR is contained multiple times) it is not disabled.
     * 
     * @param controller 
     * @param pr
     * @return true or false depending on the conditions explained above.
     */
    public static boolean isEnabled(Controller controller, ProcessingResource pr) {
        Collection<ProcessingResource> prs = controller.getPRs();
        if (!prs.contains(pr)) {
            return false;
        }
        if (controller instanceof ConditionalSerialController) {
            Collection<RunningStrategy> rss = ((ConditionalSerialController) controller).getRunningStrategies();
            for (RunningStrategy rs : rss) {
                // if we find at least one occurrence of the PR that is not disabled
                // return true
                if (rs.getPR().equals(pr) && rs.getRunMode() != RunningStrategy.RUN_NEVER) {
                    return true;
                }
            }
            // if we get here, no occurrence of the PR has found or none that
            // is not disabled, so return false
            return false;
        }
        return true;
    }

    /**
     * Return the running strategy of the PR in the controller, if the controller
     * is a conditional controller. If the controller is not a conditional 
     * controller, null is returned. If the controller is a conditional controller
     * and the PR is contained multiple times, the running strategy for the 
     * first occurrence the is found is returned.
     * 
     * @param controller
     * @param pr
     * @return A RunningStrategy object or null
     */
    public static RunningStrategy getRunningStrategy(Controller controller, ProcessingResource pr) {
        if (controller instanceof ConditionalSerialController) {
            Collection<RunningStrategy> rss = ((ConditionalSerialController) controller).getRunningStrategies();
            for (RunningStrategy rs : rss) {
                if (rs.getPR() == pr) {
                    return rs;
                }
            }
        }
        return null;
    }

    /**
     * Issue a message to the log but only if the same message has not
     * been logged already in the same GATE session.
     * This is intended for explanations or warnings that should not be 
     * repeated every time the same situation occurs.
     * 
     * @param logger - the logger instance to use
     * @param level  - the severity level for the message
     * @param message - the message itself
     */
    public static void logOnce(Logger logger, Level level, String message) {
        if (!alreadyLoggedMessages.contains(message)) {
            logger.log(level, message);
            alreadyLoggedMessages.add(message);
        }
    }

    /**
     * Check if a message has already been logged or shown. This does not log
     * or show anything but only stores the message as one that has been shown
     * already if necessary and returns if the message has been shown or not.
     *
     * @param message - the message that should only be logged or shown once
     * @return - true if the message has already been logged or checked with
     * this method.
     *
     */
    public static boolean isLoggedOnce(String message) {
        boolean isThere = alreadyLoggedMessages.contains(message);
        if (!isThere) {
            alreadyLoggedMessages.add(message);
        }
        return isThere;
    }

    private static final Set<String> alreadyLoggedMessages = Collections.synchronizedSet(new HashSet<String>());

}