gate.annotation.AnnotationSetImpl.java Source code

Introduction

Here is the source code for gate.annotation.AnnotationSetImpl.java
Source

/*
 *  AnnotationSetImpl.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Hamish Cunningham, 7/Feb/2000
 *
 *  Developer notes:
 *  ---
 *
 *  the addToIndex... and indexBy... methods could be refactored as I'm
 *  sure they can be made simpler
 *
 *  every set to which annotation will be added has to have positional
 *  indexing, so that we can find or create the nodes on the new annotations
 *
 *  note that annotations added anywhere other than sets that are
 *  stored on the document will not get stored anywhere...
 *
 *  nodes aren't doing anything useful now. needs some interface that allows
 *  their creation, defaulting to no coterminous duplicates, but allowing such
 *  if required
 *
 *  $Id$
 */
package gate.annotation;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.FeatureMap;
import gate.Gate;
import gate.GateConstants;
import gate.Node;
import gate.corpora.DocumentImpl;
import gate.event.AnnotationSetEvent;
import gate.event.AnnotationSetListener;
import gate.event.GateEvent;
import gate.event.GateListener;
import gate.relations.RelationSet;
import gate.util.InvalidOffsetException;
import gate.util.RBTreeMap;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.lang.StringUtils;

/**
 * Implementation of AnnotationSet. Has a number of indices, all bar one of
 * which are null by default and are only constructed when asked for. Has lots
 * of get methods with various selection criteria; these return views into the
 * set, which are nonetheless valid sets in their own right (but will not
 * necesarily be fully indexed). Has a name, which is null by default; clients
 * of Document can request named AnnotationSets if they so desire. Has a
 * reference to the Document it is attached to. Contrary to Collections
 * convention, there is no no-arg constructor, as this would leave the set in an
 * inconsistent state.
 * <P>
 * There are four indices: annotation by id, annotations by type, annotations by
 * start node and nodes by offset. The last two jointly provide positional
 * indexing; construction of these is triggered by indexByStart(), or by calling
 * a get method that selects on offset. The type index is triggered by
 * indexByType(), or calling a get method that selects on type. The id index is
 * always present.
 */
public class AnnotationSetImpl extends AbstractSet<Annotation> implements AnnotationSet {
    /** Freeze the serialization UID. */
    static final long serialVersionUID = 1479426765310434166L;
    /** The name of this set */
    String name = null;
    /** The document this set belongs to */
    DocumentImpl doc;
    /** Maps annotation ids (Integers) to Annotations */
    transient protected HashMap<Integer, Annotation> annotsById;
    /** Maps offsets (Longs) to nodes */
    transient RBTreeMap<Long, Node> nodesByOffset = null;
    /**
     * This field is used temporarily during serialisation to store all the
     * annotations that need to be saved. At all other times, this will be null;
     */
    private Annotation[] annotations;
    /** Maps annotation types (Strings) to AnnotationSets */
    transient Map<String, AnnotationSet> annotsByType = null;
    /**
     * Maps node ids (Integers) to Annotations or a Collection of Annotations that
     * start from that node
     */
    transient Map<Integer, Object> annotsByStartNode;
    protected transient Vector<AnnotationSetListener> annotationSetListeners;
    private transient Vector<GateListener> gateListeners;

    /**
     * A caching value that greatly improves the performance of get
     * methods that have a defined beginning and end. By tracking the
     * maximum length that an annotation can be, we know the maximum
     * amount of nodes outside of a specified range that must be checked
     * to see if an annotation starting at one of those nodes crosses into
     * the range. This mechanism is not perfect because we do not check if
     * we have to decrease it if an annotation is removed from the set.
     * However, usually annotations are removed because they are about to
     * be replaced with another one that is >= to the length of the one
     * being replaced, so this isn't a big deal. At worst, it means that
     * the get methods simply checks a few more start positions than it
     * needs to.
     */
    protected transient Long longestAnnot = 0l;

    protected RelationSet relations = null;

    // Empty AnnotationSet to be returned instead of null
    public final static AnnotationSet emptyAnnotationSet;

    static {
        emptyAnnotationSet = new ImmutableAnnotationSetImpl(null, null);
    }

    /** Construction from Document. */
    public AnnotationSetImpl(Document doc) {
        annotsById = new HashMap<Integer, Annotation>();
        this.doc = (DocumentImpl) doc;
    } // construction from document

    /** Construction from Document and name. */
    public AnnotationSetImpl(Document doc, String name) {
        this(doc);
        this.name = name;
    } // construction from document and name

    /** Construction from an existing AnnotationSet */
    @SuppressWarnings("unchecked")
    public AnnotationSetImpl(AnnotationSet c) throws ClassCastException {
        this(c.getDocument(), c.getName());
        // the original annotationset is of the same implementation
        if (c instanceof AnnotationSetImpl) {
            AnnotationSetImpl theC = (AnnotationSetImpl) c;
            annotsById.putAll(theC.annotsById);
            if (theC.annotsByStartNode != null) {
                annotsByStartNode = new HashMap<Integer, Object>(Gate.HASH_STH_SIZE);
                annotsByStartNode.putAll(theC.annotsByStartNode);
            }
            if (theC.annotsByType != null) {
                annotsByType = new HashMap<String, AnnotationSet>(Gate.HASH_STH_SIZE);
                annotsByType.putAll(theC.annotsByType);
            }
            if (theC.nodesByOffset != null) {
                nodesByOffset = (RBTreeMap<Long, Node>) theC.nodesByOffset.clone();
            }
        }
        // the implementation is not the default one
        // let's add the annotations one by one
        else {
            Iterator<Annotation> iterannots = c.iterator();
            while (iterannots.hasNext()) {
                add(iterannots.next());
            }
        }
    }

    @Override
    public void clear() {
        // while nullifying the indexes does clear the set it doesn't fire the
        // appropriate events so use the Iterator based clear implementation in
        // AbstractSet.clear() first and then reset the indexes
        super.clear();

        //reset all the indexes to be sure everything has been cleared correctly
        annotsById = new HashMap<Integer, Annotation>();
        nodesByOffset = null;
        annotsByStartNode = null;
        annotsByType = null;
        longestAnnot = 0l;
    }

    /**
     * This inner class serves as the return value from the iterator() method.
     */
    class AnnotationSetIterator implements Iterator<Annotation> {
        private Iterator<Annotation> iter;
        protected Annotation lastNext = null;

        AnnotationSetIterator() {
            iter = annotsById.values().iterator();
        }

        @Override
        public boolean hasNext() {
            return iter.hasNext();
        }

        @Override
        public Annotation next() {
            return (lastNext = iter.next());
        }

        @Override
        public void remove() {
            // this takes care of the ID index
            iter.remove();

            // what if lastNext is null
            if (lastNext == null)
                return;

            // remove from type index
            removeFromTypeIndex(lastNext);
            // remove from offset indices
            removeFromOffsetIndex(lastNext);
            // that's the second way of removing annotations from a set
            // apart from calling remove() on the set itself
            fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this,
                    AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), lastNext));
        } // remove()
    }; // AnnotationSetIterator

    /** Get an iterator for this set */
    @Override
    public Iterator<Annotation> iterator() {
        return new AnnotationSetIterator();
    }

    /** Remove an element from this set. */
    @Override
    public boolean remove(Object o) throws ClassCastException {
        Annotation a = (Annotation) o;
        boolean wasPresent = removeFromIdIndex(a);
        if (wasPresent) {
            removeFromTypeIndex(a);
            removeFromOffsetIndex(a);
        }
        // fire the event
        fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this, AnnotationSetEvent.ANNOTATION_REMOVED,
                getDocument(), a));
        return wasPresent;
    } // remove(o)

    /** Remove from the ID index. */
    protected boolean removeFromIdIndex(Annotation a) {
        if (annotsById.remove(a.getId()) == null)
            return false;
        return true;
    } // removeFromIdIndex(a)

    /** Remove from the type index. */
    protected void removeFromTypeIndex(Annotation a) {
        if (annotsByType != null) {
            AnnotationSet sameType = annotsByType.get(a.getType());
            if (sameType != null)
                sameType.remove(a);
            if (sameType != null && sameType.isEmpty()) // none left of this type
                annotsByType.remove(a.getType());
        }
    } // removeFromTypeIndex(a)

    /** Remove from the offset indices. */
    protected void removeFromOffsetIndex(Annotation a) {
        if (nodesByOffset != null) {
            // knowing when a node is no longer needed would require keeping a
            // reference
            // count on annotations, or using a weak reference to the nodes in
            // nodesByOffset
        }
        if (annotsByStartNode != null) {
            Integer id = a.getStartNode().getId();
            // might be an annotation or an annotationset
            Object objectAtNode = annotsByStartNode.get(id);
            if (objectAtNode instanceof Annotation) {
                annotsByStartNode.remove(id); // no annotations start here any
                // more
                return;
            }
            // otherwise it is a Collection
            @SuppressWarnings("unchecked")
            Collection<Annotation> starterAnnots = (Collection<Annotation>) objectAtNode;
            starterAnnots.remove(a);
            // if there is only one annotation left
            // we discard the set and put directly the annotation
            if (starterAnnots.size() == 1)
                annotsByStartNode.put(id, starterAnnots.iterator().next());
        }
    } // removeFromOffsetIndex(a)

    /** The size of this set */
    @Override
    public int size() {
        return annotsById.size();
    }

    /** Find annotations by id */
    @Override
    public Annotation get(Integer id) {
        return annotsById.get(id);
    } // get(id)

    /**
     * Get all annotations.
     *
     * @return an ImmutableAnnotationSet, empty or not
     */
    @Override
    public AnnotationSet get() {
        if (annotsById.isEmpty())
            return emptyAnnotationSet;
        return new ImmutableAnnotationSetImpl(doc, annotsById.values());
    } // get()

    /**
     * Select annotations by type
     *
     * @return an ImmutableAnnotationSet
     */
    @Override
    public AnnotationSet get(String type) {
        if (annotsByType == null)
            indexByType();
        AnnotationSet byType = annotsByType.get(type);
        if (byType == null)
            return emptyAnnotationSet;
        // convert the mutable AS into an immutable one
        return byType.get();
    } // get(type)

    /**
     * Select annotations by a set of types. Expects a Set of String.
     *
     * @return an ImmutableAnnotationSet
     */
    @Override
    public AnnotationSet get(Set<String> types) throws ClassCastException {
        if (annotsByType == null)
            indexByType();
        Iterator<String> iter = types.iterator();
        List<Annotation> annotations = new ArrayList<Annotation>();
        while (iter.hasNext()) {
            String type = iter.next();
            AnnotationSet as = annotsByType.get(type);
            if (as != null) {
                Iterator<Annotation> iterAnnot = as.iterator();
                while (iterAnnot.hasNext()) {
                    annotations.add(iterAnnot.next());
                }
            }
        } // while
        if (annotations.isEmpty())
            return emptyAnnotationSet;
        return new ImmutableAnnotationSetImpl(doc, annotations);
    } // get(types)

    /**
     * Select annotations by type and features
     *
     * This will return an annotation set containing just those annotations of a
     * particular type (i.e. with a particular name) and which have features with
     * specific names and values. (It will also return annotations that have
     * features besides those specified, but it will not return any annotations
     * that do not have all the specified feature-value pairs.)
     *
     * However, if constraints contains a feature whose value is equal to
     * gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally
     * "class"), then GATE will attempt to match that feature using an ontology
     * which it will try to retreive from a feature on the both the annotation and
     * in constraints. If these do not return identical ontologies, or if either
     * the annotation or constraints does not contain an ontology, then matching
     * will fail, and the annotation will not be added. In summary, this method
     * will not work normally for features with the name "class".
     *
     * @param type
     *          The name of the annotations to return.
     * @param constraints
     *          A feature map containing all of the feature value pairs that the
     *          annotation must have in order for them to be returned.
     * @return An annotation set containing only those annotations with the given
     *         name and which have the specified set of feature-value pairs.
     */
    @Override
    public AnnotationSet get(String type, FeatureMap constraints) {
        if (annotsByType == null)
            indexByType();
        AnnotationSet typeSet = get(type);
        if (typeSet == null)
            return null;
        Iterator<Annotation> iter = typeSet.iterator();
        List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
        while (iter.hasNext()) {
            Annotation a = iter.next();
            // we check for matching constraints by simple equality. a
            // feature map satisfies the constraints if it contains all the
            // key/value pairs from the constraints map
            // if
            // (a.getFeatures().entrySet().containsAll(constraints.entrySet()))
            if (a.getFeatures().subsumes(constraints))
                annotationsToAdd.add(a);
        } // while
        if (annotationsToAdd.isEmpty())
            return emptyAnnotationSet;
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // get(type, constraints)

    /** Select annotations by type and feature names */
    @Override
    public AnnotationSet get(String type, Set<? extends Object> featureNames) {
        if (annotsByType == null)
            indexByType();
        AnnotationSet typeSet = null;
        if (type != null) {
            // if a type is provided, try finding annotations of this type
            typeSet = get(type);
            // if none exist, then return coz nothing left to do
            if (typeSet == null)
                return null;
        }
        List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
        Iterator<Annotation> iter = null;
        if (type != null)
            iter = typeSet.iterator();
        else
            iter = annotsById.values().iterator();
        while (iter.hasNext()) {
            Annotation a = iter.next();
            // we check for matching constraints by simple equality. a
            // feature map satisfies the constraints if it contains all the
            // key/value pairs from the constraints map
            if (a.getFeatures().keySet().containsAll(featureNames))
                annotationsToAdd.add(a);
        } // while
        if (annotationsToAdd.isEmpty())
            return emptyAnnotationSet;
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // get(type, featureNames)

    /**
     * Select annotations by offset. This returns the set of annotations whose
     * start node is the least such that it is less than or equal to offset. If a
     * positional index doesn't exist it is created. If there are no nodes at or
     * beyond the offset param then it will return an empty annotationset.
     */
    @Override
    public AnnotationSet get(Long offset) {
        if (annotsByStartNode == null)
            indexByStartOffset();
        // find the next node at or after offset; get the annots starting
        // there
        Node nextNode = nodesByOffset.getNextOf(offset);
        if (nextNode == null) // no nodes at or beyond this offset
            return emptyAnnotationSet;
        Collection<Annotation> annotationsToAdd = getAnnotsByStartNode(nextNode.getId());
        // skip all the nodes that have no starting annotations
        while (annotationsToAdd == null) {
            nextNode = nodesByOffset.getNextOf(new Long(nextNode.getOffset().longValue() + 1));
            if (nextNode == null)
                return emptyAnnotationSet;
            annotationsToAdd = getAnnotsByStartNode(nextNode.getId());
        }
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    }

    /**
     * Select annotations by offset. This returns the set of annotations that
     * start exactly at the given offset. If a
     * positional index doesn't exist it is created. If there are no annotations
     * at the given offset then an empty annotation set is returned.
     * 
     * @param offset The starting offset for which to return annotations 
     * @return a ImmutableAnnotationSetImpl containing all annotations starting at the given
     *   offset (possibly empty).
     */
    public AnnotationSet getStartingAt(long offset) {
        if (annotsByStartNode == null)
            indexByStartOffset();
        Node node = nodesByOffset.get(offset);
        if (node == null) { // no nodes at or beyond this offset
            return emptyAnnotationSet;
        }
        return new ImmutableAnnotationSetImpl(doc, getAnnotsByStartNode(node.getId()));
    }

    /**
     * Return a list of annotations sorted by increasing start offset, i.e. in the order
     * they appear in the document. If more than one annotation starts at a specific offset
     * the order of these annotations is unspecified.
     * 
     * @return a list of annotations ordered by increasing start offset. If a positional
     * index does not exist, it is created.
     */
    @Override
    public List<Annotation> inDocumentOrder() {
        if (annotsByStartNode == null)
            indexByStartOffset();
        Collection<Node> values = nodesByOffset.values();
        List<Annotation> result = new ArrayList<Annotation>();
        for (Node nodeObj : values) {
            Collection<Annotation> anns = getAnnotsByStartNode(nodeObj.getId());
            if (anns != null) {
                result.addAll(anns);
            }
        }
        return result;
    }

    /**
     * Select annotations by offset. This returns the set of annotations that
     * overlap totaly or partially with the interval defined by the two provided
     * offsets.The result will include all the annotations that either:
     * <ul>
     * <li>start before the start offset and end strictly after it</li>
     * <li>OR</li>
     * <li>start at a position between the start and the end offsets</li>
     *
     * @return an ImmutableAnnotationSet
     */
    @Override
    public AnnotationSet get(Long startOffset, Long endOffset) {
        return get(null, startOffset, endOffset);
    } // get(startOfset, endOffset)

    /**
     * Select annotations by offset. This returns the set of annotations that
     * overlap strictly with the interval defined by the two provided offsets.The
     * result will include all the annotations that start at the start offset and
     * end strictly at the end offset
     */
    public AnnotationSet getStrict(Long startOffset, Long endOffset) {
        // the result will include all the annotations that
        // start at the start offset and end strictly at the end offset
        if (annotsByStartNode == null)
            indexByStartOffset();
        List<Annotation> annotationsToAdd = null;
        Iterator<Annotation> annotsIter;
        Node currentNode;
        Annotation currentAnnot;
        // find all the annots that start at the start offset
        currentNode = nodesByOffset.get(startOffset);
        if (currentNode != null) {
            Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
            if (objFromPoint != null) {
                annotsIter = objFromPoint.iterator();
                while (annotsIter.hasNext()) {
                    currentAnnot = annotsIter.next();
                    if (currentAnnot.getEndNode().getOffset().compareTo(endOffset) == 0) {
                        if (annotationsToAdd == null)
                            annotationsToAdd = new ArrayList<Annotation>();
                        annotationsToAdd.add(currentAnnot);
                    } // if
                } // while
            } // if
        } // if
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // getStrict(startOfset, endOffset)

    /**
     * Select annotations by offset. This returns the set of annotations of the
     * given type that overlap totaly or partially with the interval defined by
     * the two provided offsets.The result will include all the annotations that
     * either:
     * <ul>
     * <li>start before the start offset and end strictly after it</li>
     * <li>OR</li>
     * <li>start at a position between the start and the end offsets</li>
     */
    @Override
    public AnnotationSet get(String neededType, Long startOffset, Long endOffset) {
        if (annotsByStartNode == null)
            indexByStartOffset();
        List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
        Iterator<Node> nodesIter;
        Iterator<Annotation> annotsIter;
        Node currentNode;
        Annotation currentAnnot;
        boolean checkType = StringUtils.isNotBlank(neededType);
        // find all the annots that start strictly before the start offset
        // and end
        // strictly after it
        Long searchStart = (startOffset - longestAnnot);
        if (searchStart < 0)
            searchStart = 0l;
        //nodesIter = nodesByOffset.headMap(startOffset).values().iterator();
        nodesIter = nodesByOffset.subMap(searchStart, startOffset).values().iterator();
        while (nodesIter.hasNext()) {
            currentNode = nodesIter.next();
            Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
            if (objFromPoint == null)
                continue;
            annotsIter = objFromPoint.iterator();
            while (annotsIter.hasNext()) {
                currentAnnot = annotsIter.next();
                //if neededType is set, make sure this is the right type
                if (checkType && !currentAnnot.getType().equals(neededType))
                    continue;
                if (currentAnnot.getEndNode().getOffset().compareTo(startOffset) > 0) {
                    annotationsToAdd.add(currentAnnot);
                } // if
            } // while
        }
        // find all the annots that start at or after the start offset but
        // before the end offset
        nodesIter = nodesByOffset.subMap(startOffset, endOffset).values().iterator();
        while (nodesIter.hasNext()) {
            currentNode = nodesIter.next();
            Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
            if (objFromPoint == null)
                continue;
            //if no specific type requested, add all of the annots
            if (!checkType)
                annotationsToAdd.addAll(objFromPoint);
            else {
                //check the type of each annot
                annotsIter = objFromPoint.iterator();
                while (annotsIter.hasNext()) {
                    currentAnnot = annotsIter.next();
                    if (currentAnnot.getType().equals(neededType))
                        annotationsToAdd.add(currentAnnot);
                } // while
            }
        }
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // get(type, startOfset, endOffset)

    /**
     * Select annotations of the given type that completely span the range.
     * Formally, for any annotation a, a will be included in the return
     * set if:
     * <ul>
     * <li>a.getStartNode().getOffset() <= startOffset</li>
     * <li>and</li>
     * <li>a.getEndNode().getOffset() >= endOffset</li>
     *
     * @param neededType Type of annotation to return. If empty, all
     *          annotation types will be returned.
     * @return annotations of the given type that completely span the range.
     */
    @Override
    public AnnotationSet getCovering(String neededType, Long startOffset, Long endOffset) {
        //check the range
        if (endOffset < startOffset)
            return emptyAnnotationSet;
        //ensure index
        if (annotsByStartNode == null)
            indexByStartOffset();
        //if the requested range is longer than the longest annotation in this set, 
        //then there can be no annotations covering the range
        // so we return an empty set.
        if (endOffset - startOffset > longestAnnot)
            return emptyAnnotationSet;

        List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
        Iterator<Node> nodesIter;
        Iterator<Annotation> annotsIter;
        Node currentNode;
        Annotation currentAnnot;
        boolean checkType = StringUtils.isNotBlank(neededType);
        // find all the annots with startNode <= startOffset.  Need the + 1 because
        // headMap returns strictly less than.
        // the length of the longest annot from the endOffset since we know that nothing
        // that starts earlier will be long enough to cover the entire span.
        Long searchStart = ((endOffset - 1) - longestAnnot);
        if (searchStart < 0)
            searchStart = 0l;
        //nodesIter = nodesByOffset.headMap(startOffset + 1).values().iterator();
        nodesIter = nodesByOffset.subMap(searchStart, startOffset + 1).values().iterator();

        while (nodesIter.hasNext()) {
            currentNode = nodesIter.next();
            Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
            if (objFromPoint == null)
                continue;
            annotsIter = objFromPoint.iterator();
            while (annotsIter.hasNext()) {
                currentAnnot = annotsIter.next();
                //if neededType is set, make sure this is the right type
                if (checkType && !currentAnnot.getType().equals(neededType))
                    continue;
                //check that the annot ends at or after the endOffset
                if (currentAnnot.getEndNode().getOffset().compareTo(endOffset) >= 0)
                    annotationsToAdd.add(currentAnnot);
            } // while
        }
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // get(type, startOfset, endOffset)

    /** Select annotations by type, features and offset */
    @Override
    public AnnotationSet get(String type, FeatureMap constraints, Long offset) {
        // select by offset
        AnnotationSet nextAnnots = get(offset);
        if (nextAnnots == null)
            return emptyAnnotationSet;
        // select by type and constraints from the next annots
        return nextAnnots.get(type, constraints);
    } // get(type, constraints, offset)

    /**
     * Select annotations contained within an interval, i.e.
     * those annotations whose start position is
     * >= <code>startOffset</code> and whose end position is &lt;= 
     * <code>endOffset</code>.
     */
    @Override
    public AnnotationSet getContained(Long startOffset, Long endOffset) {
        // the result will include all the annotations that either:
        // start at a position between the start and end before the end
        // offsets
        //check the range
        if (endOffset < startOffset)
            return emptyAnnotationSet;
        //ensure index
        if (annotsByStartNode == null)
            indexByStartOffset();
        List<Annotation> annotationsToAdd = null;
        Iterator<Node> nodesIter;
        Node currentNode;
        Iterator<Annotation> annotIter;
        // find all the annots that start at or after the start offset but
        // strictly
        // before the end offset
        nodesIter = nodesByOffset.subMap(startOffset, endOffset).values().iterator();
        while (nodesIter.hasNext()) {
            currentNode = nodesIter.next();
            Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
            if (objFromPoint == null)
                continue;
            // loop through the annotations and find only those that
            // also end before endOffset
            annotIter = objFromPoint.iterator();
            while (annotIter.hasNext()) {
                Annotation annot = annotIter.next();
                if (annot.getEndNode().getOffset().compareTo(endOffset) <= 0) {
                    if (annotationsToAdd == null)
                        annotationsToAdd = new ArrayList<Annotation>();
                    annotationsToAdd.add(annot);
                }
            }
        }
        return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
    } // get(startOfset, endOffset)

    /** Get the node with the smallest offset */
    @Override
    public Node firstNode() {
        indexByStartOffset();
        if (nodesByOffset.isEmpty())
            return null;
        else
            return nodesByOffset.get(nodesByOffset.firstKey());
    } // firstNode

    /** Get the node with the largest offset */
    @Override
    public Node lastNode() {
        indexByStartOffset();
        if (nodesByOffset.isEmpty())
            return null;
        else
            return nodesByOffset.get(nodesByOffset.lastKey());
    } // lastNode

    /**
     * Get the first node that is relevant for this annotation set and which has
     * the offset larger than the one of the node provided.
     */
    @Override
    public Node nextNode(Node node) {
        indexByStartOffset();
        return nodesByOffset.getNextOf(new Long(node.getOffset().longValue() + 1));
    }

    protected static AnnotationFactory annFactory;

    /**
     * Set the annotation factory used to create annotation objects. The default
     * factory is {@link DefaultAnnotationFactory}.
     */
    public static void setAnnotationFactory(AnnotationFactory newFactory) {
        annFactory = newFactory;
    }

    static {
        // set the default factory to always create AnnotationImpl objects
        setAnnotationFactory(new DefaultAnnotationFactory());
    }

    /**
     * Create and add an annotation with pre-existing nodes, and return its id.
     * <B>Note that only Nodes retrieved from the same annotation set should be used
     * to create a new annotation using this method. Using Nodes from other annotation
     * sets may lead to undefined behaviour. If in any doubt use the Long based add
     * method instead of this one.</B>
     */
    @Override
    public Integer add(Node start, Node end, String type, FeatureMap features) {
        // the id of the new annotation
        Integer id = doc.getNextAnnotationId();
        // construct an annotation
        annFactory.createAnnotationInSet(this, id, start, end, type, features);

        return id;
    } // add(Node, Node, String, FeatureMap)

    /** Add an existing annotation. Returns true when the set is modified. */
    @Override
    public boolean add(Annotation a) throws ClassCastException {
        Object oldValue = annotsById.put(a.getId(), a);
        if (annotsByType != null)
            addToTypeIndex(a);
        if (annotsByStartNode != null)
            addToStartOffsetIndex(a);
        AnnotationSetEvent evt = new AnnotationSetEvent(this, AnnotationSetEvent.ANNOTATION_ADDED, doc, a);
        fireAnnotationAdded(evt);
        fireGateEvent(evt);
        return oldValue != a;
    } // add(o)

    /**
     * Adds multiple annotations to this set in one go. All the objects in the
     * provided collection should be of {@link gate.Annotation} type, otherwise a
     * ClassCastException will be thrown. The provided annotations will be used to
     * create new annotations using the appropriate add() methods from this set.
     * The new annotations will have different IDs from the old ones (which is
     * required in order to preserve the uniqueness of IDs inside an annotation
     * set).
     *
     * @param c
     *          a collection of annotations
     * @return <tt>true</tt> if the set has been modified as a result of this
     *         call.
     */
    @Override
    public boolean addAll(Collection<? extends Annotation> c) {
        Iterator<? extends Annotation> annIter = c.iterator();
        boolean changed = false;
        while (annIter.hasNext()) {
            Annotation a = annIter.next();
            try {
                add(a.getStartNode().getOffset(), a.getEndNode().getOffset(), a.getType(), a.getFeatures());
                changed = true;
            } catch (InvalidOffsetException ioe) {
                throw new IllegalArgumentException(ioe.toString());
            }
        }
        return changed;
    }

    /**
     * Adds multiple annotations to this set in one go. All the objects in the
     * provided collection should be of {@link gate.Annotation} type, otherwise a
     * ClassCastException will be thrown. This method does not create copies of
     * the annotations like addAll() does but simply adds the new annotations to
     * the set. It is intended to be used solely by annotation sets in order to
     * construct the results for various get(...) methods.
     *
     * @param c
     *          a collection of annotations
     * @return <tt>true</tt> if the set has been modified as a result of this
     *         call.
     */
    protected boolean addAllKeepIDs(Collection<? extends Annotation> c) {
        Iterator<? extends Annotation> annIter = c.iterator();
        boolean changed = false;
        while (annIter.hasNext()) {
            Annotation a = annIter.next();
            changed |= add(a);
        }
        return changed;
    }

    /** Returns the nodes corresponding to the Longs. The Nodes are created if
     * they don't exist.
     **/
    private final Node[] getNodes(Long start, Long end) throws InvalidOffsetException {
        // are the offsets valid?
        if (!doc.isValidOffsetRange(start, end)) {
            throw new InvalidOffsetException("Offsets [" + start + ":" + end
                    + "] not valid for this document of size " + doc.getContent().size());
        }
        // the set has to be indexed by position in order to add, as we need
        // to find out if nodes need creating or if they exist already
        if (nodesByOffset == null) {
            indexByStartOffset();
        }
        // find existing nodes if appropriate nodes don't already exist,
        // create them
        Node startNode = nodesByOffset.get(start);
        if (startNode == null)
            startNode = new NodeImpl(doc.getNextNodeId(), start);

        Node endNode = null;
        if (start.equals(end)) {
            endNode = startNode;
            return new Node[] { startNode, endNode };
        }

        endNode = nodesByOffset.get(end);
        if (endNode == null)
            endNode = new NodeImpl(doc.getNextNodeId(), end);

        return new Node[] { startNode, endNode };
    }

    /** Create and add an annotation and return its id */
    @Override
    public Integer add(Long start, Long end, String type, FeatureMap features) throws InvalidOffsetException {
        Node[] nodes = getNodes(start, end);
        // delegate to the method that adds annotations with existing nodes
        return add(nodes[0], nodes[1], type, features);
    } // add(start, end, type, features)

    /**
     * Create and add an annotation from database read data In this case the id is
     * already known being previously fetched from the database
     */
    @Override
    public void add(Integer id, Long start, Long end, String type, FeatureMap features)
            throws InvalidOffsetException {
        Node[] nodes = getNodes(start, end);
        // construct an annotation
        annFactory.createAnnotationInSet(this, id, nodes[0], nodes[1], type, features);

        //try to ensure that if someone adds an annotation directly by ID
        //the other methods don't trample all over it later
        if (id > doc.peakAtNextAnnotationId()) {
            doc.setNextAnnotationId(id + 1);
        }
    } // add(id, start, end, type, features)

    /** Construct the positional index. */
    protected void indexByType() {
        if (annotsByType != null)
            return;
        annotsByType = new HashMap<String, AnnotationSet>(Gate.HASH_STH_SIZE);
        Iterator<Annotation> annotIter = annotsById.values().iterator();
        while (annotIter.hasNext())
            addToTypeIndex(annotIter.next());
    } // indexByType()

    /** Construct the positional indices for annotation start */
    protected void indexByStartOffset() {
        if (annotsByStartNode != null)
            return;
        if (nodesByOffset == null)
            nodesByOffset = new RBTreeMap<Long, Node>();
        annotsByStartNode = new HashMap<Integer, Object>(annotsById.size());
        Iterator<Annotation> annotIter = annotsById.values().iterator();
        while (annotIter.hasNext())
            addToStartOffsetIndex(annotIter.next());
    } // indexByStartOffset()

    /**
     * Add an annotation to the type index. Does nothing if the index doesn't
     * exist.
     */
    void addToTypeIndex(Annotation a) {
        if (annotsByType == null)
            return;
        String type = a.getType();
        AnnotationSet sameType = annotsByType.get(type);
        if (sameType == null) {
            sameType = new AnnotationSetImpl(doc);
            annotsByType.put(type, sameType);
        }
        sameType.add(a);
    } // addToTypeIndex(a)

    /**
     * Add an annotation to the start offset index. Does nothing if the index
     * doesn't exist.
     */
    @SuppressWarnings("unchecked")
    void addToStartOffsetIndex(Annotation a) {
        Node startNode = a.getStartNode();
        Node endNode = a.getEndNode();
        Long start = startNode.getOffset();
        Long end = endNode.getOffset();
        // add a's nodes to the offset index
        if (nodesByOffset != null) {
            nodesByOffset.put(start, startNode);
            nodesByOffset.put(end, endNode);
        }

        //add marking for longest annot
        long annotLength = end - start;
        if (annotLength > longestAnnot)
            longestAnnot = annotLength;

        // if there's no appropriate index give up
        if (annotsByStartNode == null)
            return;
        // get the annotations that start at the same node, or create new
        // set
        Object thisNodeObject = annotsByStartNode.get(startNode.getId());
        if (thisNodeObject == null) {
            // put directly the annotation
            annotsByStartNode.put(startNode.getId(), a);
        } else { // already something there : a single Annotation or a
            // Collection
            Set<Annotation> newCollection = null;
            if (thisNodeObject instanceof Annotation) {
                // we need to create a set - we have more than one annotation
                // starting
                // at this Node
                if (thisNodeObject.equals(a))
                    return;
                newCollection = new HashSet<Annotation>(3);
                newCollection.add((Annotation) thisNodeObject);
                annotsByStartNode.put(startNode.getId(), newCollection);
            } else
                newCollection = (Set<Annotation>) thisNodeObject;
            // get the existing set
            // add the new node annotation
            newCollection.add(a);
        }
    } // addToStartOffsetIndex(a)

    /**
     * Propagate document content changes to this AnnotationSet. 
     * 
     * This method is called for all annotation sets of a document from 
     * DocumentImpl.edit to adapt the annotations to the text changes made through
     * the edit. The behaviour of this method is influenced by the configuration
     * setting {@link gate.GateConstants#DOCEDIT_INSERT_PREPEND GateConstants.DOCEDIT_INSERT_PREPEND }: 
     * annotations immediately 
     * ending before or starting after the point of insertion will either become
     * part of the inserted text or not. Currently it works like this:
     * <ul>
     * <li>PREPEND=true: annotation before will become part, annotation after not
     * <li>PREPEND=false: annotation before will not become part, annotation after 
     * will become part
     * </UL>
     * NOTE 1 (JP): There is another setting
     * {@link gate.GateConstants#DOCEDIT_INSERT_APPEND GateConstants.DOCEDIT_INSERT_APPEND }
     * but 
     * this setting does currently not influence the behaviour of this method. 
     * The behaviour of this method may change in the future so that 
     * DOCEDIT_INSERT_APPEND is considered separately and in addition to 
     * DOCEDIT_INSERT_PREPEND so that it can be controlled independently if 
     * the annotation before and/or after an insertion point gets expanded or not.
     * <p>
     * NOTE 2: This method has, unfortunately, to be
     * public, to allow DocumentImpls to get at it. Oh for a "friend" declaration.
     * Doesn't throw InvalidOffsetException as DocumentImpl is the only client,
     * and that checks the offsets before calling this method.
     */
    public void edit(Long start, Long end, DocumentContent replacement) {
        // make sure we have the indices computed
        indexByStartOffset();
        if (end.compareTo(start) > 0) {
            // get the nodes that need to be processed (the nodes internal to
            // the
            // removed section plus the marginal ones
            List<Node> affectedNodes = new ArrayList<Node>(
                    nodesByOffset.subMap(start, new Long(end.longValue() + 1)).values());
            // if we have more than 1 node we need to delete all apart from
            // the first
            // and move the annotations so that they refer to the one we keep
            // (the
            // first)
            NodeImpl firstNode = null;
            if (!affectedNodes.isEmpty()) {
                firstNode = (NodeImpl) affectedNodes.get(0);
                List<Annotation> startingAnnotations = new ArrayList<Annotation>();
                List<Annotation> endingAnnotations = new ArrayList<Annotation>();
                // now we need to find all the annotations
                // ending in the zone
                List<Node> beforeNodes = new ArrayList<Node>(
                        nodesByOffset.subMap(new Long(0), new Long(end.longValue() + 1)).values());
                Iterator<Node> beforeNodesIter = beforeNodes.iterator();
                while (beforeNodesIter.hasNext()) {
                    Node currentNode = beforeNodesIter.next();
                    Collection<Annotation> annotations = getAnnotsByStartNode(currentNode.getId());
                    if (annotations == null)
                        continue;
                    // iterates on the annotations in this set
                    Iterator<Annotation> localIterator = annotations.iterator();
                    while (localIterator.hasNext()) {
                        Annotation annotation = localIterator.next();
                        long offsetEndAnnotation = annotation.getEndNode().getOffset().longValue();
                        // we are interested only in the annotations ending
                        // inside the zone
                        if (offsetEndAnnotation >= start.longValue() && offsetEndAnnotation <= end.longValue())
                            endingAnnotations.add(annotation);
                    }
                }
                for (int i = 1; i < affectedNodes.size(); i++) {
                    Node aNode = affectedNodes.get(i);
                    Collection<Annotation> annSet = getAnnotsByStartNode(aNode.getId());
                    if (annSet != null) {
                        startingAnnotations.addAll(annSet);
                    }
                    // remove the node
                    // nodesByOffset.remove(aNode.getOffset());
                    // annotsByStartNode.remove(aNode);
                }
                // modify the annotations so they point to the saved node
                Iterator<Annotation> annIter = startingAnnotations.iterator();
                while (annIter.hasNext()) {
                    AnnotationImpl anAnnot = (AnnotationImpl) annIter.next();
                    anAnnot.start = firstNode;
                    // remove the modified annotation if it has just become
                    // zero-length
                    if (anAnnot.start == anAnnot.end) {
                        remove(anAnnot);
                    } else {
                        addToStartOffsetIndex(anAnnot);
                    }
                }
                annIter = endingAnnotations.iterator();
                while (annIter.hasNext()) {
                    AnnotationImpl anAnnot = (AnnotationImpl) annIter.next();
                    anAnnot.end = firstNode;
                    // remove the modified annotation if it has just become
                    // zero-length
                    if (anAnnot.start == anAnnot.end) {
                        remove(anAnnot);
                    }
                }
                // remove the unused nodes inside the area
                for (int i = 1; i < affectedNodes.size(); i++) {
                    Node aNode = affectedNodes.get(i);
                    nodesByOffset.remove(aNode.getOffset());
                    annotsByStartNode.remove(aNode.getId());
                }
                // repair the first node
                // remove from offset index
                nodesByOffset.remove(firstNode.getOffset());
                // change the offset for the saved node
                firstNode.setOffset(start);
                // add back to the offset index
                nodesByOffset.put(firstNode.getOffset(), firstNode);
            }
        }
        // now handle the insert and/or update the rest of the nodes'
        // position
        // get the user selected behaviour (defaults to append)
        boolean shouldPrepend = Gate.getUserConfig().getBoolean(GateConstants.DOCEDIT_INSERT_PREPEND)
                .booleanValue();
        long s = start.longValue(), e = end.longValue();
        long rlen = // length of the replacement value
                ((replacement == null) ? 0 : replacement.size().longValue());
        // update the offsets and the index by offset for the rest of the
        // nodes
        List<Node> nodesAfterReplacement = new ArrayList<Node>(nodesByOffset.tailMap(start).values());
        // remove from the index by offset
        Iterator<Node> nodesAfterReplacementIter = nodesAfterReplacement.iterator();
        while (nodesAfterReplacementIter.hasNext()) {
            NodeImpl n = (NodeImpl) nodesAfterReplacementIter.next();
            nodesByOffset.remove(n.getOffset());
        }
        // change the offsets
        nodesAfterReplacementIter = nodesAfterReplacement.iterator();
        while (nodesAfterReplacementIter.hasNext()) {
            NodeImpl n = (NodeImpl) nodesAfterReplacementIter.next();
            long oldOffset = n.getOffset().longValue();
            // by default we move all nodes back
            long newOffset = oldOffset - (e - s) + rlen;
            // for the first node we need behave differently
            if (oldOffset == s) {
                // the first offset never moves back
                if (newOffset < s)
                    newOffset = s;
                // if we're prepending we don't move forward
                if (shouldPrepend)
                    newOffset = s;
            }
            n.setOffset(new Long(newOffset));
        }
        // add back to the index by offset with the new offsets
        nodesAfterReplacementIter = nodesAfterReplacement.iterator();
        while (nodesAfterReplacementIter.hasNext()) {
            NodeImpl n = (NodeImpl) nodesAfterReplacementIter.next();
            nodesByOffset.put(n.getOffset(), n);
        }
        // //rebuild the indices with the new offsets
        // nodesByOffset = null;
        // annotsByStartNode = null;
        // annotsByEndNode = null;
        // indexByStartOffset();
        // indexByEndOffset();
    } // edit(start,end,replacement)

    /** Get the name of this set. */
    @Override
    public String getName() {
        return name;
    }

    /** Get the document this set is attached to. */
    @Override
    public Document getDocument() {
        return doc;
    }

    /**
     * Get a set of java.lang.String objects representing all the annotation types
     * present in this annotation set.
     */
    @Override
    public Set<String> getAllTypes() {
        indexByType();
        return Collections.unmodifiableSet(annotsByType.keySet());
    }

    /**
     * Returns a set of annotations starting at that position This intermediate
     * method is used to simplify the code as the values of the annotsByStartNode
     * hashmap can be Annotations or a Collection of Annotations. Returns null if
     * there are no Annotations at that position
     */
    @SuppressWarnings("unchecked")
    private final Collection<Annotation> getAnnotsByStartNode(Integer id) {
        Object objFromPoint = annotsByStartNode.get(id);
        if (objFromPoint == null)
            return null;
        if (objFromPoint instanceof Annotation) {
            List<Annotation> al = new ArrayList<Annotation>(2);
            al.add((Annotation) objFromPoint);
            return al;
        }
        // it is already a collection
        // return it
        return (Collection<Annotation>) objFromPoint;
    }

    /**
     *
     * @return a clone of this set.
     * @throws CloneNotSupportedException
     */
    @Override
    public Object clone() throws CloneNotSupportedException {
        return super.clone();
    }

    @Override
    public synchronized void removeAnnotationSetListener(AnnotationSetListener l) {
        if (annotationSetListeners != null && annotationSetListeners.contains(l)) {
            @SuppressWarnings("unchecked")
            Vector<AnnotationSetListener> v = (Vector<AnnotationSetListener>) annotationSetListeners.clone();
            v.removeElement(l);
            annotationSetListeners = v;
        }
    }

    @Override
    public synchronized void addAnnotationSetListener(AnnotationSetListener l) {
        @SuppressWarnings("unchecked")
        Vector<AnnotationSetListener> v = annotationSetListeners == null ? new Vector<AnnotationSetListener>(2)
                : (Vector<AnnotationSetListener>) annotationSetListeners.clone();
        if (!v.contains(l)) {
            v.addElement(l);
            annotationSetListeners = v;
        }
    }

    protected void fireAnnotationAdded(AnnotationSetEvent e) {
        if (annotationSetListeners != null) {
            Vector<AnnotationSetListener> listeners = annotationSetListeners;
            int count = listeners.size();
            for (int i = 0; i < count; i++) {
                listeners.elementAt(i).annotationAdded(e);
            }
        }
    }

    protected void fireAnnotationRemoved(AnnotationSetEvent e) {
        if (annotationSetListeners != null) {
            Vector<AnnotationSetListener> listeners = annotationSetListeners;
            int count = listeners.size();
            for (int i = 0; i < count; i++) {
                listeners.elementAt(i).annotationRemoved(e);
            }
        }
    }

    @Override
    public synchronized void removeGateListener(GateListener l) {
        if (gateListeners != null && gateListeners.contains(l)) {
            @SuppressWarnings("unchecked")
            Vector<GateListener> v = (Vector<GateListener>) gateListeners.clone();
            v.removeElement(l);
            gateListeners = v;
        }
    }

    @Override
    public synchronized void addGateListener(GateListener l) {
        @SuppressWarnings("unchecked")
        Vector<GateListener> v = gateListeners == null ? new Vector<GateListener>(2)
                : (Vector<GateListener>) gateListeners.clone();
        if (!v.contains(l)) {
            v.addElement(l);
            gateListeners = v;
        }
    }

    protected void fireGateEvent(GateEvent e) {
        if (gateListeners != null) {
            Vector<GateListener> listeners = gateListeners;
            int count = listeners.size();
            for (int i = 0; i < count; i++) {
                listeners.elementAt(i).processGateEvent(e);
            }
        }
    }

    // how to serialize this object?
    // there is no need to serialize the indices
    // so it's probably as fast to just recreate them
    // if required
    private void writeObject(java.io.ObjectOutputStream out) throws IOException {
        ObjectOutputStream.PutField pf = out.putFields();
        pf.put("name", this.name);
        pf.put("doc", this.doc);
        //
        // out.writeObject(this.name);
        // out.writeObject(this.doc);
        // save only the annotations
        // in an array that will prevent the need for casting
        // when deserializing
        annotations = new Annotation[this.annotsById.size()];
        annotations = this.annotsById.values().toArray(annotations);
        // out.writeObject(annotations);
        pf.put("annotations", this.annotations);
        out.writeFields();
        annotations = null;
        boolean isIndexedByType = (this.annotsByType != null);
        boolean isIndexedByStartNode = (this.annotsByStartNode != null);
        out.writeBoolean(isIndexedByType);
        out.writeBoolean(isIndexedByStartNode);
    }

    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
        this.longestAnnot = 0l;
        ObjectInputStream.GetField gf = in.readFields();
        this.name = (String) gf.get("name", null);
        this.doc = (DocumentImpl) gf.get("doc", null);
        boolean isIndexedByType = false;
        boolean isIndexedByStartNode = false;
        this.annotations = (Annotation[]) gf.get("annotations", null);
        if (this.annotations == null) {
            // old style serialised version
            @SuppressWarnings("unchecked")
            Map<Integer, Annotation> annotsByIdMap = (Map<Integer, Annotation>) gf.get("annotsById", null);
            if (annotsByIdMap == null)
                throw new IOException(
                        "Invalid serialised data: neither annotations array or map by id" + " are present.");
            annotations = annotsByIdMap.values().toArray(new Annotation[] {});
        } else {
            // new style serialised version
            isIndexedByType = in.readBoolean();
            isIndexedByStartNode = in.readBoolean();
        }
        // this.name = (String)in.readObject();
        // this.doc = (DocumentImpl)in.readObject();
        // Annotation[] annotations = (Annotation[])in.readObject();
        // do we need to create the indices?
        // boolean isIndexedByType = in.readBoolean();
        // boolean isIndexedByStartNode = in.readBoolean();
        this.annotsById = new HashMap<Integer, Annotation>(annotations.length);
        // rebuilds the indices if required
        if (isIndexedByType) {
            annotsByType = new HashMap<String, AnnotationSet>(Gate.HASH_STH_SIZE);
        }
        if (isIndexedByStartNode) {
            nodesByOffset = new RBTreeMap<Long, Node>();
            annotsByStartNode = new HashMap<Integer, Object>(annotations.length);
        }
        // add all the annotations one by one
        for (int i = 0; i < annotations.length; i++) {
            add(annotations[i]);
        }
        annotations = null;
    }

    @Override
    public RelationSet getRelations() {
        if (relations == null) {
            relations = new RelationSet(this);
        }
        return relations;
    }
} // AnnotationSetImpl