Java tutorial
/** * AnnotationTools.java * * Copyright (c) 2006, JULIE Lab. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Common Public License v1.0 * * Author: tomanek * * Current version: 1.3 * Since version: 1.0 * * Creation date: Feb 19, 2006 * * Tool for creating new UIMA annotation Objects and other annotation related things * * //TODO: we may move some functions from JulesTools here... **/ package de.julielab.jcore.utility; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.Range; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JFSIndexRepository; // import de.julielab.jcore.types.Annotation; import org.apache.uima.jcas.tcas.Annotation; public class JCoReAnnotationTools { /** * returns an annotation object (de.julielab.jcore.types.annotation) of the type specified by fullEntityClassName. * This is done by means of dynamic class loading and reflection. * * @param aJCas * the jcas to which to link this annotation object * @param fullAnnotationClassName * the full class name of the new annotation object * @return */ public static Annotation getAnnotationByClassName(JCas aJCas, String fullAnnotationClassName) throws ClassNotFoundException, SecurityException, NoSuchMethodException, IllegalArgumentException, InstantiationException, IllegalAccessException, InvocationTargetException { Class[] parameterTypes = new Class[] { JCas.class }; Class myNewClass = Class.forName(fullAnnotationClassName); Constructor myConstructor = myNewClass.getConstructor(parameterTypes); Annotation anno = (Annotation) myConstructor.newInstance(aJCas); return anno; } /** * returns an annotation of the type fullEntityClassName which has exactly the specified offset * * @param aJCas * the cas to search in * @param fullAnnotationClassName * the full class name of the specific annotation type * * @param startOffset * @param endOffset * @return the first annotation object of the given type at exactly the given offset. If no annotation is found * there, NULL is returned */ public static Annotation getAnnotationAtOffset(JCas aJCas, String fullAnnotationClassName, int startOffset, int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName); JFSIndexRepository indexes = aJCas.getJFSIndexRepository(); Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator(); while (annoIter.hasNext()) { Annotation currAnno = (Annotation) annoIter.next(); if ((currAnno.getBegin() == startOffset) && (currAnno.getEnd() == endOffset)) { return currAnno; } } return null; } @SuppressWarnings("unchecked") public static <T extends Annotation> T getAnnotationAtMatchingOffsets(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException( "Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) { cursor.moveToPrevious(); } if (!cursor.isValid()) cursor.moveToFirst(); else cursor.moveToNext(); // Now that we have our starting point, we go to the right until we find an annotation of the correct type and // the same offsets as focusAnnotation Annotation currentAnnotation = null; while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) { if (!cls.isInstance(currentAnnotation)) { cursor.moveToNext(); continue; } Range<Integer> currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range<Integer> focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.equals(currentRange)) return (T) currentAnnotation; cursor.moveToNext(); } return null; } /** * returns an annotation of the type fullEntityClassName which overlaps an or is overlapped by an annotation of the * same type at the given offset * * @param aJCas * The cas to search in * @param ullAnnotationClassName * The full class name of the specific annotation type * * @param startOffset * @param endOffset * @return The first annotation object of the given type at exactly the given offset. If no annotation is found * there, NULL is returned */ public static Annotation getOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName, int startOffset, int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName); JFSIndexRepository indexes = aJCas.getJFSIndexRepository(); Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator(); while (annoIter.hasNext()) { Annotation currAnno = (Annotation) annoIter.next(); if ((currAnno.getBegin() <= startOffset) && (currAnno.getEnd() >= endOffset)) { return currAnno; } else if ((currAnno.getBegin() >= startOffset) && (currAnno.getEnd() <= endOffset)) { return currAnno; } // else if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) { return currAnno; } else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) { return currAnno; } } return null; } /** * returns an annotation of the type fullEntityClassName which partially overlaps an or is overlapped by an * annotation of the same type at the given offset * * @param aJCas * The cas to search in * @param fullAnnotationClassName * The full class name of the specific annotation type * * @param startOffset * @param endOffset * @return The first annotation object of the given type at exactly the given offset. If no annotation is found * there, NULL is returned */ public static Annotation getPartiallyOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName, int startOffset, int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName); JFSIndexRepository indexes = aJCas.getJFSIndexRepository(); Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator(); while (annoIter.hasNext()) { Annotation currAnno = (Annotation) annoIter.next(); if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) { return currAnno; } else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) { return currAnno; } } return null; } /** * Returns the leftmost annotation of type <tt>cls</tt> that overlaps <tt>focusAnnotation</tt>. That is, if multiple * annotations of type <tt>cls</tt> overlap with <tt>focusAnnotation</tt>, the one with the lowest begin offset will * be chosen. * <p> * The two annotations may overlap in any way (partial, nested, inclusion, exact match). This algorithm has * <tt>O(n)</tt> runtime with <tt>n</tt> being the number of annotations in the annotation index. * </p> * * * <p> * TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a * number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to * focusAnnotation and not always from the very beginning of the annotation index. Same thing for * getIncludingAnnotation(). * </p> * * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type <tt>cls</tt> that overlaps <tt>focusAnnotation</tt>. */ @SuppressWarnings("unchecked") public static <T extends Annotation> T getPartiallyOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); // Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the // beginning. cursor.moveToFirst(); // Now go to the right as long as we don't yet overlap with the focus annotation, then stop. Annotation currentAnnotation = null; while (cursor.isValid() && ((currentAnnotation = cursor.get()).getEnd() <= focusAnnotation.getBegin() || !cls.isInstance(currentAnnotation))) { cursor.moveToNext(); } // Check whether we have found an overlapping annotation. Range<Integer> currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range<Integer> focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.isOverlappedBy(focusRange)) return (T) cursor.get(); return null; } /** * Returns, in ascending order, all annotations of type <tt>cls</tt> that are completely included - perhaps with * having the same begin and/or end as the <tt>focusAnnotation</tt> - in <tt>focusAnnotation</tt>. * * @param aJCas * @param focusAnnotation * @param cls * @return */ @SuppressWarnings("unchecked") public static <T extends Annotation> List<T> getIncludedAnnotations(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException( "Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a lower begin offset compared // to the focusAnnotation. while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) { cursor.moveToPrevious(); } if (!cursor.isValid()) cursor.moveToFirst(); else cursor.moveToNext(); // Now that we have our starting point, we go to the right as long as there is a possibility to still find // annotations included in the focusAnnotation, i.e. as long the current begin offset is still lower (or equal // for the weird case of zero-length-annotations) than the // end offset of the focusAnnotation Annotation currentAnnotation = null; List<T> includedAnnotations = new ArrayList<>(); while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) { if (!cls.isInstance(currentAnnotation)) { cursor.moveToNext(); continue; } Range<Integer> currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range<Integer> focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.containsRange(currentRange)) includedAnnotations.add((T) currentAnnotation); cursor.moveToNext(); } return includedAnnotations; } /** * Returns the leftmost annotation of type <tt>cls</tt> that completely includes <tt>focusAnnotation</tt>. That is, * if multiple annotations of type <tt>cls</tt> include <tt>focusAnnotation</tt>, the one with the lowest begin * offset will be chosen. * <p> * This algorithm has <tt>O(n)</tt> runtime with <tt>n</tt> being the number of annotations in the annotation index. * </p> * <p> * TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a * number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to * focusAnnotation and not always from the very beginning of the annotation index. Same thing for * getPartiallyOverlappingAnnotation(). * </p> * * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type <tt>cls</tt> that completely includes <tt>focusAnnotation</tt>. */ @SuppressWarnings("unchecked") public static <T extends Annotation> T getIncludingAnnotation(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); // Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the // beginning. cursor.moveToFirst(); // Now go to the right as long as we don't yet overlap with the focus annotation, then stop. Annotation currentAnnotation = null; while (cursor.isValid() && ((currentAnnotation = cursor.get()).getEnd() < focusAnnotation.getEnd() || !cls.isInstance(currentAnnotation))) { cursor.moveToNext(); } // Check whether we have found an overlapping annotation. Range<Integer> currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range<Integer> focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange)) return (T) cursor.get(); return null; } /** * Returns the nearest annotation of class <tt>cls</tt> to <tt>focusAnnotation</tt>, i.e. the one (or just one, if * multiple exist) with the highest start-offset that completely overlaps <tt>focusAnnotation</tt>. * <p> * This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding * the nearest token or sentence. * </p> * * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type <tt>cls</tt> that completely includes <tt>focusAnnotation</tt>. */ @SuppressWarnings("unchecked") public static <T extends Annotation> T getNearestIncludingAnnotation(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); if (!cursor.isValid()) throw new IllegalArgumentException( "Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation. Afterwards we we search for an including annotation to the left. cursor.moveTo(focusAnnotation); while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getBegin()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveTo(focusAnnotation); else cursor.moveToPrevious(); // Now that we have our starting point, we go to the left until we find the first annotation of correct type // completely overlapping the focus annotation. while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } Range<Integer> currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range<Integer> focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange)) return (T) currentAnnotation; cursor.moveToPrevious(); } return null; } /** * Returns the nearest annotations of class <tt>cls</tt> to <tt>focusAnnotation</tt>, i.e. all annotations * overlapping <tt>focusAnnotation</tt> where between the leftest returned annotation and the next returned * annotation (that is, returned offsets 0 and 1, if there are multiple returned annotations) there is no other * annotation of type <tt>cls</tt>. * <p> * This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding * overlapping tokens. * </p> * * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type <tt>cls</tt> that completely includes <tt>focusAnnotation</tt>. */ @SuppressWarnings("unchecked") public static <T extends Annotation> List<T> getNearestOverlappingAnnotations(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException( "Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right. while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveToLast(); else cursor.moveToPrevious(); List<T> overlappingAnnotations = new ArrayList<>(); while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd() && currentAnnotation.getEnd() > focusAnnotation.getBegin()) { overlappingAnnotations.add((T) currentAnnotation); // As soon as we have an overlapping annotation of the correct type that begins at or before the begin // offset of the // focusAnnotation, we are finished. if (currentAnnotation.getBegin() < focusAnnotation.getBegin()) { Collections.reverse(overlappingAnnotations); return overlappingAnnotations; } } cursor.moveToPrevious(); } // Order by ascending begin offsets. Collections.reverse(overlappingAnnotations); return overlappingAnnotations; } /** * Returns the annotation with the highest end offset of type <tt>cls</tt> overlapping <tt>focusAnnotation</tt>. * <p> * This method is very similar to {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)}. Actually, the * last result element of {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)} equals the returned * annotation from this method. * </p> * * @param aJCas * @param focusAnnotation * @param cls * @return */ @SuppressWarnings("unchecked") public static <T extends Annotation> T getLastOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation, Class<T> cls) { FSIterator<Annotation> cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException( "Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right. while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveToLast(); else cursor.moveToPrevious(); while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd() && currentAnnotation.getEnd() > focusAnnotation.getBegin()) { return (T) currentAnnotation; } cursor.moveToPrevious(); } return null; } }