edu.stanford.nlp.trees.Tree.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.trees.Tree.java

Source

package edu.stanford.nlp.trees;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.stream.Collectors;
import java.util.*;
import java.util.function.Function;
import java.util.function.Predicate;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasIndex;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.LabelFactory;
import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.util.logging.Redwood;

/**
 * The abstract class {@code Tree} is used to collect all of the
 * tree types, and acts as a generic extensible type.  This is the
 * standard implementation of inheritance-based polymorphism.
 * All {@code Tree} objects support accessors for their children (a
 * {@code Tree[]}), their label (a {@code Label}), and their
 * score (a {@code double}).  However, different concrete
 * implementations may or may not include the latter two, in which
 * case a default value is returned.  The class Tree defines no data
 * fields.  The two abstract methods that must be implemented are:
 * {@code children()}, and {@code treeFactory()}.  Notes
 * that {@code setChildren(Tree[])} is now an optional
 * operation, whereas it was previously required to be
 * implemented. There is now support for finding the parent of a
 * tree.  This may be done by search from a tree root, or via a
 * directly stored parent.  The {@code Tree} class now
 * implements the {@code Collection} interface: in terms of
 * this, each <i>node</i> of the tree is an element of the
 * collection; hence one can explore the tree by using the methods of
 * this interface.  A {@code Tree} is regarded as a read-only
 * {@code Collection} (even though the {@code Tree} class
 * has various methods that modify trees).  Moreover, the
 * implementation is <i>not</i> thread-safe: no attempt is made to
 * detect and report concurrent modifications.
 *
 * @author Christopher Manning
 * @author Dan Klein
 * @author Sarah Spikes (sdspikes@cs.stanford.edu) - filled in types
 */
public abstract class Tree extends AbstractCollection<Tree> implements Label, Labeled, Scored, Serializable {

    /** A logger for this class */
    private static Redwood.RedwoodChannels log = Redwood.channels(Tree.class);

    private static final long serialVersionUID = 5441849457648722744L;

    /**
     * A leaf node should have a zero-length array for its
     * children. For efficiency, classes can use this array as a
     * return value for children() for leaf nodes if desired.
     * This can also be used elsewhere when you want an empty Tree array.
     */
    public static final Tree[] EMPTY_TREE_ARRAY = new Tree[0];

    public Tree() {
    }

    /**
     * Says whether a node is a leaf.  Can be used on an arbitrary
     * {@code Tree}.  Being a leaf is defined as having no
     * children.  This must be implemented as returning a zero-length
     * Tree[] array for children().
     *
     * @return true if this object is a leaf
     */
    public boolean isLeaf() {
        return numChildren() == 0;
    }

    /**
     * Says how many children a tree node has in its local tree.
     * Can be used on an arbitrary {@code Tree}.  Being a leaf is defined
     * as having no children.
     *
     * @return The number of direct children of the tree node
     */
    public int numChildren() {
        return children().length;
    }

    /**
     * Says whether the current node has only one child.
     * Can be used on an arbitrary {@code Tree}.
     *
     * @return Whether the node heads a unary rewrite
     */
    public boolean isUnaryRewrite() {
        return numChildren() == 1;
    }

    /**
     * Return whether this node is a preterminal or not.  A preterminal is
     * defined to be a node with one child which is itself a leaf.
     *
     * @return true if the node is a preterminal; false otherwise
     */
    public boolean isPreTerminal() {
        Tree[] kids = children();
        return (kids.length == 1) && (kids[0].isLeaf());
    }

    /**
     * Return whether all the children of this node are preterminals or not.
     * A preterminal is
     * defined to be a node with one child which is itself a leaf.
     * Considered false if the node has no children
     *
     * @return true if the node is a prepreterminal; false otherwise
     */
    public boolean isPrePreTerminal() {
        Tree[] kids = children();
        if (kids.length == 0) {
            return false;
        }
        for (Tree kid : kids) {
            if (!kid.isPreTerminal()) {
                return false;
            }
        }
        return true;
    }

    /**
     * Return whether this node is a phrasal node or not.  A phrasal node
     * is defined to be a node which is not a leaf or a preterminal.
     * Worded positively, this means that it must have two or more children,
     * or one child that is not a leaf.
     *
     * @return {@code true} if the node is phrasal; {@code false} otherwise
     */
    public boolean isPhrasal() {
        Tree[] kids = children();
        return !(kids == null || kids.length == 0 || (kids.length == 1 && kids[0].isLeaf()));
    }

    /**
     * Returns whether this node is the root of a possibly binary tree.
     * This happens if the tree and all of its descendants are either
     * nodes with exactly two children, or are preterminals or leaves.
     */
    public boolean isBinary() {
        if (isLeaf() || isPreTerminal()) {
            return true;
        }

        Tree[] kids = children();
        if (kids.length != 2) {
            return false;
        }

        return (kids[0].isBinary() && kids[1].isBinary());
    }

    /**
     * Implements equality for Tree's.  Two Tree objects are equal if they
     * have equal {@link #value}s, the same number of children, and their children
     * are pairwise equal.
     *
     * @param o The object to compare with
     * @return Whether two things are equal
     */
    @Override
    public boolean equals(Object o) {
        if (o == this) {
            return true;
        }
        if (!(o instanceof Tree)) {
            return false;
        }
        Tree t = (Tree) o;
        String value1 = this.value();
        String value2 = t.value();
        if (value1 != null || value2 != null) {
            if (value1 == null || value2 == null || !value1.equals(value2)) {
                return false;
            }
        }
        Tree[] myKids = children();
        Tree[] theirKids = t.children();
        //if((myKids == null && (theirKids == null || theirKids.length != 0)) || (theirKids == null && myKids.length != 0) || (myKids.length != theirKids.length)){
        if (myKids.length != theirKids.length) {
            return false;
        }
        for (int i = 0; i < myKids.length; i++) {
            if (!myKids[i].equals(theirKids[i])) {
                return false;
            }
        }
        return true;
    }

    /**
     * Implements a hashCode for Tree's.  Two trees should have the same
     * hashcode if they are equal, so we hash on the label value and
     * the children's label values.
     *
     * @return The hash code
     */
    @Override
    public int hashCode() {
        String v = this.value();
        int hc = (v == null) ? 1 : v.hashCode();
        Tree[] kids = children();
        for (int i = 0; i < kids.length; i++) {
            v = kids[i].value();
            int hc2 = (v == null) ? i : v.hashCode();
            hc ^= (hc2 << i);
        }
        return hc;
    }

    /**
     * Returns the position of a Tree in the children list, if present,
     * or -1 if it is not present.  Trees are checked for presence with
     * object equality, ==.  Note that there are very few cases where an
     * indexOf that used .equals() instead of == would be useful and
     * correct.  In most cases, you want to figure out which child of
     * the parent a known tree is, so looking for object equality will
     * be faster and will avoid cases where you happen to have two
     * subtrees that are exactly the same.
     *
     * @param tree The tree to look for in children list
     * @return Its index in the list or -1
     */
    public int objectIndexOf(Tree tree) {
        Tree[] kids = children();
        for (int i = 0; i < kids.length; i++) {
            if (kids[i] == tree) {
                return i;
            }
        }
        return -1;
    }

    /**
     * Returns an array of children for the current node.  If there
     * are no children (if the node is a leaf), this must return a
     * Tree[] array of length 0.  A null children() value for tree
     * leaves was previously supported, but no longer is.
     * A caller may assume that either {@code isLeaf()} returns
     * true, or this node has a nonzero number of children.
     *
     * @return The children of the node
     * @see #getChildrenAsList()
     */
    public abstract Tree[] children();

    /**
     * Returns a List of children for the current node.  If there are no
     * children, then a (non-null) {@code List<Tree>} of size 0 will
     * be returned.  The list has new list structure but pointers to,
     * not copies of the children.  That is, the returned list is mutable,
     * and simply adding to or deleting items from it is safe, but beware
     * changing the contents of the children.
     *
     * @return The children of the node
     */
    public List<Tree> getChildrenAsList() {
        return new ArrayList<>(Arrays.asList(children()));
    }

    /**
     * Set the children of this node to be the children given in the
     * array.  This is an <b>optional</b> operation; by default it is
     * unsupported.  Note for subclasses that if there are no
     * children, the children() method must return a Tree[] array of
     * length 0.  This class provides a
     * {@code EMPTY_TREE_ARRAY} canonical zero-length Tree[] array
     * to represent zero children, but it is <i>not</i> required that
     * leaf nodes use this particular zero-length array to represent
     * a leaf node.
     *
     * @param children The array of children, each a {@code Tree}
     * @see #setChildren(List)
     */
    public void setChildren(Tree[] children) {
        throw new UnsupportedOperationException();
    }

    /**
     * Set the children of this tree node to the given list.  This
     * method is implemented in the {@code Tree} class by
     * converting the {@code List} into a tree array and calling
     * the array-based method.  Subclasses which use a
     * {@code List}-based representation of tree children should
     * override this method.  This implementation allows the case
     * that the {@code List} is {@code null}: it yields a
     * node with no children (represented by a canonical zero-length
     * children() array).
     *
     * @param childTreesList A list of trees to become children of the node.
     *          This method does not retain the List that you pass it (copying
     *          is done), but it will retain the individual children (they are
     *          not copied).
     * @see #setChildren(Tree[])
     */
    public void setChildren(List<? extends Tree> childTreesList) {
        if (childTreesList == null || childTreesList.isEmpty()) {
            setChildren(EMPTY_TREE_ARRAY);
        } else {
            Tree[] childTrees = new Tree[childTreesList.size()];
            childTreesList.toArray(childTrees);
            setChildren(childTrees);
        }
    }

    /**
     * Returns the label associated with the current node, or null
     * if there is no label.  The default implementation always
     * returns {@code null}.
     *
     * @return The label of the node
     */
    @Override
    public Label label() {
        return null;
    }

    /**
     * Sets the label associated with the current node, if there is one.
     * The default implementation ignores the label.
     *
     * @param label The label
     */
    @Override
    public void setLabel(Label label) {
        // a noop
    }

    /**
     * Returns the score associated with the current node, or NaN
     * if there is no score.  The default implementation returns NaN.
     *
     * @return The score
     */
    @Override
    public double score() {
        return Double.NaN;
    }

    /**
     * Sets the score associated with the current node, if there is one.
     *
     * @param score The score
     */
    public void setScore(double score) {
        throw new UnsupportedOperationException(
                "You must use a tree type that implements scoring in order call setScore()");
    }

    /**
     * Returns the first child of a tree, or {@code null} if none.
     *
     * @return The first child
     */
    public Tree firstChild() {
        Tree[] kids = children();
        if (kids.length == 0) {
            return null;
        }
        return kids[0];
    }

    /**
     * Returns the last child of a tree, or {@code null} if none.
     *
     * @return The last child
     */
    public Tree lastChild() {
        Tree[] kids = children();
        if (kids.length == 0) {
            return null;
        }
        return kids[kids.length - 1];
    }

    /** Return the highest node of the (perhaps trivial) unary chain that
     *  this node is part of.
     *  In case this node is the only child of its parent, trace up the chain of
     *  unaries, and return the uppermost node of the chain (the node whose
     *  parent has multiple children, or the node that is the root of the tree).
     *
     *  @param root The root of the tree that contains this subtree
     *  @return The uppermost node of the unary chain, if this node is in a unary
     *         chain, or else the current node
     */
    public Tree upperMostUnary(Tree root) {
        Tree parent = parent(root);
        if (parent == null) {
            return this;
        }
        if (parent.numChildren() > 1) {
            return this;
        }
        return parent.upperMostUnary(root);
    }

    /**
     * Assign a SpanAnnotation on each node of this tree.
     *  The index starts at zero.
     */
    public void setSpans() {
        constituentsNodes(0);
    }

    /**
     * Returns SpanAnnotation of this node, or null if annotation is not assigned.
     * Use {@code setSpans()} to assign SpanAnnotations to a tree.
     *
     * @return an IntPair: the SpanAnnotation of this node.
     */
    public IntPair getSpan() {
        if (label() instanceof CoreMap && ((CoreMap) label()).containsKey(CoreAnnotations.SpanAnnotation.class))
            return ((CoreMap) label()).get(CoreAnnotations.SpanAnnotation.class);
        return null;
    }

    /**
     * Returns the Constituents generated by the parse tree. Constituents
     * are computed with respect to whitespace (e.g., at the word level).
     *
     * @return a Set of the constituents as constituents of
     *         type {@code Constituent}
     */
    public Set<Constituent> constituents() {
        return constituents(new SimpleConstituentFactory());
    }

    /**
     * Returns the Constituents generated by the parse tree.
     * The Constituents of a sentence include the preterminal categories
     * but not the leaves.
     *
     * @param cf ConstituentFactory used to build the Constituent objects
     * @return a Set of the constituents as SimpleConstituent type
     *         (in the current implementation, a {@code HashSet}
     */
    public Set<Constituent> constituents(ConstituentFactory cf) {
        return constituents(cf, false);
    }

    /**
     * Returns the Constituents generated by the parse tree.
     * The Constituents of a sentence include the preterminal categories
     * but not the leaves.
     *
     * @param cf ConstituentFactory used to build the Constituent objects
     * @param maxDepth The maximum depth at which to add constituents,
     *                 where 0 is the root level.  Negative maxDepth
     *                 indicates no maximum.
     * @return a Set of the constituents as SimpleConstituent type
     *         (in the current implementation, a {@code HashSet}
     */
    public Set<Constituent> constituents(ConstituentFactory cf, int maxDepth) {
        Set<Constituent> constituentsSet = Generics.newHashSet();
        constituents(constituentsSet, 0, cf, false, null, maxDepth, 0);
        return constituentsSet;
    }

    /**
     * Returns the Constituents generated by the parse tree.
     * The Constituents of a sentence include the preterminal categories
     * but not the leaves.
     *
     * @param cf ConstituentFactory used to build the Constituent objects
     * @param charLevel If true, compute bracketings irrespective of whitespace boundaries.
     * @return a Set of the constituents as SimpleConstituent type
     *         (in the current implementation, a {@code HashSet}
     */
    public Set<Constituent> constituents(ConstituentFactory cf, boolean charLevel) {
        Set<Constituent> constituentsSet = Generics.newHashSet();
        constituents(constituentsSet, 0, cf, charLevel, null, -1, 0);
        return constituentsSet;
    }

    public Set<Constituent> constituents(ConstituentFactory cf, boolean charLevel, Predicate<Tree> filter) {
        Set<Constituent> constituentsSet = Generics.newHashSet();
        constituents(constituentsSet, 0, cf, charLevel, filter, -1, 0);
        return constituentsSet;
    }

    /**
     * Same as int constituents but just puts the span as an IntPair
     * in the CoreLabel of the nodes.
     *
     * @param left The left position to begin labeling from
     * @return The index of the right frontier of the constituent
     */
    private int constituentsNodes(int left) {
        if (isLeaf()) {
            if (label() instanceof CoreLabel) {
                ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, left));
            } else {
                throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel");
            }
            return (left + 1);
        }
        int position = left;

        // enumerate through daughter trees
        Tree[] kids = children();
        for (Tree kid : kids)
            position = kid.constituentsNodes(position);

        //Parent span
        if (label() instanceof CoreLabel) {
            ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, position - 1));
        } else {
            throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel");
        }

        return position;
    }

    /**
     * Adds the constituents derived from {@code this} tree to
     * the ordered {@code Constituent} {@code Set}, beginning
     * numbering from the second argument and returning the number of
     * the right edge.  The reason for the return of the right frontier
     * is in order to produce bracketings recursively by threading through
     * the daughters of a given tree.
     *
     * @param constituentsSet set of constituents to add results of bracketing
     *                        this tree to
     * @param left            left position to begin labeling the bracketings with
     * @param cf              ConstituentFactory used to build the Constituent objects
     * @param charLevel       If true, compute constituents without respect to whitespace. Otherwise, preserve whitespace boundaries.
     * @param filter          A filter to use to decide whether or not to add a tree as a constituent.
     * @param maxDepth        The maximum depth at which to allow constituents.  Set to negative to indicate all depths allowed.
     * @param depth           The current depth
     * @return Index of right frontier of Constituent
     */
    private int constituents(Set<Constituent> constituentsSet, int left, ConstituentFactory cf, boolean charLevel,
            Predicate<Tree> filter, int maxDepth, int depth) {

        if (isPreTerminal())
            return left + ((charLevel) ? firstChild().value().length() : 1);

        int position = left;

        // log.info("In bracketing trees left is " + left);
        // log.info("  label is " + label() +
        //                       "; num daughters: " + children().length);
        Tree[] kids = children();
        for (Tree kid : kids) {
            position = kid.constituents(constituentsSet, position, cf, charLevel, filter, maxDepth, depth + 1);
            // log.info("  position went to " + position);
        }

        if ((filter == null || filter.test(this)) && (maxDepth < 0 || depth <= maxDepth)) {
            //Compute span of entire tree at the end of recursion
            constituentsSet.add(cf.newConstituent(left, position - 1, label(), score()));
        }
        // log.info("  added " + label());
        return position;
    }

    /**
     * Returns a new Tree that represents the local Tree at a certain node.
     * That is, it builds a new tree that copies the mother and daughter
     * nodes (but not their Labels), as non-Leaf nodes,
     * but zeroes out their children.
     *
     * @return A local tree
     */
    public Tree localTree() {
        Tree[] kids = children();
        Tree[] newKids = new Tree[kids.length];
        TreeFactory tf = treeFactory();
        for (int i = 0, n = kids.length; i < n; i++) {
            newKids[i] = tf.newTreeNode(kids[i].label(), Arrays.asList(EMPTY_TREE_ARRAY));
        }
        return tf.newTreeNode(label(), Arrays.asList(newKids));
    }

    /**
     * Returns a set of one level {@code Tree}s that ares the local trees
     * of the tree.
     * That is, it builds a new tree that copies the mother and daughter
     * nodes (but not their Labels), for each phrasal node,
     * but zeroes out their children.
     *
     * @return A set of local tree
     */
    public Set<Tree> localTrees() {
        Set<Tree> set = Generics.newHashSet();
        for (Tree st : this) {
            if (st.isPhrasal()) {
                set.add(st.localTree());
            }
        }
        return set;
    }

    /**
     * Most instances of {@code Tree} will take a lot more than
     * than the default {@code StringBuffer} size of 16 to print
     * as an indented list of the whole tree, so we enlarge the default.
     */
    private static final int initialPrintStringBuilderSize = 500;

    /**
     * Appends the printed form of a parse tree (as a bracketed String)
     * to a {@code StringBuilder}.
     * The implementation of this may be more efficient than for
     * {@code toString()} on complex trees.
     *
     * @param sb The {@code StringBuilder} to which the tree will be appended
     * @return Returns the {@code StringBuilder} passed in with extra stuff in it
     */
    public StringBuilder toStringBuilder(StringBuilder sb) {
        return toStringBuilder(sb, label -> (label.value() == null) ? "" : label.value());
    }

    /**
     * Appends the printed form of a parse tree (as a bracketed String)
     * to a {@code StringBuilder}.
     * The implementation of this may be more efficient than for
     * {@code toString()} on complex trees.
     *
     * @param sb The {@code StringBuilder} to which the tree will be appended
     * @param labelFormatter Formatting routine for how to print a Label
     * @return Returns the {@code StringBuilder} passed in with extra stuff in it
     */
    public StringBuilder toStringBuilder(StringBuilder sb, Function<Label, String> labelFormatter) {
        if (isLeaf()) {
            if (label() != null) {
                sb.append(labelFormatter.apply(label()));
            }
            return sb;
        } else {
            sb.append('(');
            if (label() != null) {
                sb.append(labelFormatter.apply(label()));
            }
            Tree[] kids = children();
            if (kids != null) {
                for (Tree kid : kids) {
                    sb.append(' ');
                    kid.toStringBuilder(sb, labelFormatter);
                }
            }
            return sb.append(')');
        }
    }

    /**
     * Converts parse tree to string in Penn Treebank format.
     *
     * Implementation note: Internally, the method gains
     * efficiency by chaining use of a single {@code StringBuilder}
     * through all the printing.
     *
     * @return the tree as a bracketed list on one line
     */
    @Override
    public String toString() {
        return toStringBuilder(new StringBuilder(Tree.initialPrintStringBuilderSize)).toString();
    }

    private static final int indentIncr = 2;

    private static String makeIndentString(int indent) {
        StringBuilder sb = new StringBuilder(indent);
        for (int i = 0; i < indentIncr; i++) {
            sb.append(' ');
        }
        return sb.toString();
    }

    public void printLocalTree() {
        printLocalTree(new PrintWriter(System.out, true));
    }

    /**
     * Only prints the local tree structure, does not recurse
     */
    public void printLocalTree(PrintWriter pw) {
        pw.print("(" + label() + ' ');
        for (Tree kid : children()) {
            pw.print("(");
            pw.print(kid.label());
            pw.print(") ");
        }
        pw.println(")");
    }

    /**
     * Indented list printing of a tree.  The tree is printed in an
     * indented list notation, with node labels followed by node scores.
     */
    public void indentedListPrint() {
        indentedListPrint(new PrintWriter(System.out, true), false);
    }

    /**
     * Indented list printing of a tree.  The tree is printed in an
     * indented list notation, with node labels followed by node scores.
     *
     * @param pw The PrintWriter to print the tree to
     * @param printScores Whether to print the scores (log probs) of tree nodes
     */
    public void indentedListPrint(PrintWriter pw, boolean printScores) {
        indentedListPrint("", makeIndentString(indentIncr), pw, printScores);
    }

    /**
     * Indented list printing of a tree.  The tree is printed in an
     * indented list notation, with node labels followed by node scores.
     * String parameters are used rather than integer levels for efficiency.
     *
     * @param indent The base {@code String} (normally just spaces)
     *               to print before each line of tree
     * @param pad    The additional {@code String} (normally just more
     *               spaces) to add when going to a deeper level of {@code Tree}.
     * @param pw     The PrintWriter to print the tree to
     * @param printScores Whether to print the scores (log probs) of tree nodes
     */
    private void indentedListPrint(String indent, String pad, PrintWriter pw, boolean printScores) {
        StringBuilder sb = new StringBuilder(indent);
        Label label = label();
        if (label != null) {
            sb.append(label);
        }
        if (printScores) {
            sb.append("  ");
            sb.append(score());
        }
        pw.println(sb);
        Tree[] children = children();
        String newIndent = indent + pad;
        for (Tree child : children) {
            child.indentedListPrint(newIndent, pad, pw, printScores);
        }
    }

    /**
     * Indented xml printing of a tree.  The tree is printed in an indented xml notation.
     */
    public void indentedXMLPrint() {
        indentedXMLPrint(new PrintWriter(System.out, true), false);
    }

    /**
     * Indented xml printing of a tree.  The tree is printed in an
     * indented xml notation, with node labels followed by node scores.
     *
     * @param pw The PrintWriter to print the tree to
     * @param printScores Whether to print the scores (log probs) of tree nodes
     */
    public void indentedXMLPrint(PrintWriter pw, boolean printScores) {
        indentedXMLPrint("", makeIndentString(indentIncr), pw, printScores);
    }

    /**
     * Indented xml printing of a tree.  The tree is printed in an
     * indented xml notation, with node labels followed by node scores.
     * String parameters are used rather than integer levels for efficiency.
     *
     * @param indent The base {@code String} (normally just spaces)
     *               to print before each line of tree
     * @param pad    The additional {@code String} (normally just more
     *               spaces) to add when going to a deeper level of {@code Tree}.
     * @param pw     The PrintWriter to print the tree to
     * @param printScores Whether to print the scores (log probs) of tree nodes
     */
    private void indentedXMLPrint(String indent, String pad, PrintWriter pw, boolean printScores) {
        StringBuilder sb = new StringBuilder(indent);
        Tree[] children = children();
        Label label = label();
        if (label != null) {
            sb.append('<');
            if (children.length > 0) {
                sb.append("node value=\"");
            } else {
                sb.append("leaf value=\"");
            }
            sb.append(XMLUtils.escapeXML(SentenceUtils.wordToString(label, true)));
            sb.append('"');
            if (printScores) {
                sb.append(" score=");
                sb.append(score());
            }
            if (children.length > 0) {
                sb.append('>');
            } else {
                sb.append("/>");
            }
        } else {
            if (children.length > 0) {
                sb.append("<node>");
            } else {
                sb.append("<leaf/>");
            }
        }
        pw.println(sb);
        if (children.length > 0) {
            String newIndent = indent + pad;
            for (Tree child : children) {
                child.indentedXMLPrint(newIndent, pad, pw, printScores);
            }
            pw.println(indent + "</node>");
        }
    }

    private static void displayChildren(Tree[] trChildren, int indent, boolean parentLabelNull,
            Function<Label, String> labelFormatter, PrintWriter pw) {
        boolean firstSibling = true;
        boolean leftSibIsPreTerm = true; // counts as true at beginning
        for (Tree currentTree : trChildren) {
            currentTree.display(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, labelFormatter, pw);
            leftSibIsPreTerm = currentTree.isPreTerminal();
            // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting
            if (currentTree.value() != null && currentTree.value().startsWith("CC")) {
                leftSibIsPreTerm = false;
            }
            firstSibling = false;
        }
    }

    /**
     *  Returns the value of the node's label as a String.  This is done by
     *  calling {@code toString()} on the value, if it exists. Otherwise,
     *  an empty string is returned.
     *
     *  @return The label of a tree node as a String
     */
    public String nodeString() {
        return (value() == null) ? "" : value();
    }

    /**
     * Display a node, implementing Penn Treebank style layout
     */
    private void display(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal,
            boolean topLevel, Function<Label, String> labelFormatter, PrintWriter pw) {
        // the condition for staying on the same line in Penn Treebank
        boolean suppressIndent = (parentLabelNull && firstSibling) || (firstSibling && isPreTerminal())
                || (leftSiblingPreTerminal && isPreTerminal()
                        && (label() == null || !label().value().startsWith("CC")));
        if (suppressIndent) {
            pw.print(" ");
        } else {
            if (!topLevel) {
                pw.println();
            }
            for (int i = 0; i < indent; i++) {
                pw.print("  ");
            }
        }
        if (isLeaf() || isPreTerminal()) {
            String terminalString = toStringBuilder(new StringBuilder(), labelFormatter).toString();
            pw.print(terminalString);
            pw.flush();
            return;
        }
        pw.print("(");
        pw.print(labelFormatter.apply(label()));
        boolean parentIsNull = label() == null || label().value() == null;
        displayChildren(children(), indent + 1, parentIsNull, labelFormatter, pw);
        pw.print(")");
        pw.flush();
    }

    /**
     * Print the tree as done in Penn Treebank merged files.
     * The formatting should be exactly the same, but we don't print the
     * trailing whitespace found in Penn Treebank trees.
     * The basic deviation from a bracketed indented tree is to in general
     * collapse the printing of adjacent preterminals onto one line of
     * tags and words.  Additional complexities are that conjunctions
     * (tag CC) are not collapsed in this way, and that the unlabeled
     * outer brackets are collapsed onto the same line as the next
     * bracket down.
     *
     * @param pw The tree is printed to this {@code PrintWriter}
     */
    public void pennPrint(PrintWriter pw) {
        pennPrint(pw, label -> (label.value() == null) ? "" : label.value());
    }

    public void pennPrint(PrintWriter pw, Function<Label, String> labelFormatter) {
        display(0, false, false, false, true, labelFormatter, pw);
        pw.println();
        pw.flush();
    }

    /**
     * Print the tree as done in Penn Treebank merged files.
     * The formatting should be exactly the same, but we don't print the
     * trailing whitespace found in Penn Treebank trees.
     * The basic deviation from a bracketed indented tree is to in general
     * collapse the printing of adjacent preterminals onto one line of
     * tags and words.  Additional complexities are that conjunctions
     * (tag CC) are not collapsed in this way, and that the unlabeled
     * outer brackets are collapsed onto the same line as the next
     * bracket down.
     *
     * @param ps The tree is printed to this {@code PrintStream}
     */
    public void pennPrint(PrintStream ps) {
        pennPrint(new PrintWriter(new OutputStreamWriter(ps), true));
    }

    public void pennPrint(PrintStream ps, Function<Label, String> labelFormatter) {
        pennPrint(new PrintWriter(new OutputStreamWriter(ps), true), labelFormatter);
    }

    /**
     * Calls {@code pennPrint()} and saves output to a String
     *
     * @return The indent S-expression representation of a Tree
     */
    public String pennString() {
        StringWriter sw = new StringWriter();
        pennPrint(new PrintWriter(sw));
        return sw.toString();
    }

    /**
     * Return String of leaves spanned by this tree assuming they are CoreLabel's
     * Throws an IllegalArgumentException if the leaves are not CoreLabels that contain
     * text info as in the typical use case of a Tree generated by a pipeline
     *
     * @return The text of the span of this Tree
     */
    public String spanString() {
        // check this Tree supports this method by having properly populated CoreLabel's
        List<Tree> leaves = this.getLeaves();
        if (!(leaves.get(0).label() instanceof CoreLabel)) {
            throw new IllegalArgumentException("Expected leaves to be CoreLabels");
        } else if (((CoreLabel) leaves.get(0).label()).word() == null) {
            throw new IllegalArgumentException("Expected CoreLabel's to have text");
        } else if (((CoreLabel) leaves.get(0).label()).after() == null) {
            throw new IllegalArgumentException("Expected CoreLabel's to have after() text");
        }
        List<CoreLabel> coreLabels = this.getLeaves().stream().map(l -> ((CoreLabel) l.label()))
                .collect(Collectors.toList());
        // reconstruct original String from CoreLabel fields
        String spanString = coreLabels.subList(0, Math.max(0, coreLabels.size() - 1)).stream()
                .map(cl -> cl.word() + cl.after()).collect(Collectors.joining(""));
        // don't add the after of the last word
        spanString += coreLabels.get(coreLabels.size() - 1).word();
        return spanString;
    }

    /**
     * Print the tree as done in Penn Treebank merged files.
     * The formatting should be exactly the same, but we don't print the
     * trailing whitespace found in Penn Treebank trees.
     * The tree is printed to {@code System.out}. The basic deviation
     * from a bracketed indented tree is to in general
     * collapse the printing of adjacent preterminals onto one line of
     * tags and words.  Additional complexities are that conjunctions
     * (tag CC) are not collapsed in this way, and that the unlabeled
     * outer brackets are collapsed onto the same line as the next
     * bracket down.
     */
    public void pennPrint() {
        pennPrint(System.out);
    }

    /**
     * Finds the depth of the tree.  The depth is defined as the length
     * of the longest path from this node to a leaf node.  Leaf nodes
     * have depth zero.  POS tags have depth 1. Phrasal nodes have
     * depth &gt;= 2.
     *
     * @return the depth
     */
    public int depth() {
        if (isLeaf()) {
            return 0;
        }
        int maxDepth = 0;
        Tree[] kids = children();
        for (Tree kid : kids) {
            int curDepth = kid.depth();
            if (curDepth > maxDepth) {
                maxDepth = curDepth;
            }
        }
        return maxDepth + 1;
    }

    /**
     * Finds the distance from this node to the specified node.
     * return -1 if this is not an ancestor of node.
     *
     * @param node A subtree contained in this tree
     * @return the depth
     */
    public int depth(Tree node) {
        Tree p = node.parent(this);
        if (this == node) {
            return 0;
        }
        if (p == null) {
            return -1;
        }
        int depth = 1;
        while (this != p) {
            p = p.parent(this);
            depth++;
        }
        return depth;
    }

    /**
     * Returns the tree leaf that is the head of the tree.
     *
     * @param hf The head-finding algorithm to use
     * @param parent  The parent of this tree
     * @return The head tree leaf if any, else {@code null}
     */
    public Tree headTerminal(HeadFinder hf, Tree parent) {
        if (isLeaf()) {
            return this;
        }
        Tree head = hf.determineHead(this, parent);
        if (head != null) {
            return head.headTerminal(hf, parent);
        }
        log.info("Head is null: " + this);
        return null;
    }

    /**
     * Returns the tree leaf that is the head of the tree.
     *
     * @param hf The headfinding algorithm to use
     * @return The head tree leaf if any, else {@code null}
     */
    public Tree headTerminal(HeadFinder hf) {
        return headTerminal(hf, null);
    }

    /**
     * Returns the preterminal tree that is the head of the tree.
     * See {@link #isPreTerminal()} for
     * the definition of a preterminal node. Beware that some tree nodes may
     * have no preterminal head.
     *
     * @param hf The headfinding algorithm to use
     * @return The head preterminal tree, if any, else {@code null}
     * @throws IllegalArgumentException if called on a leaf node
     */
    public Tree headPreTerminal(HeadFinder hf) {
        if (isPreTerminal()) {
            return this;
        } else if (isLeaf()) {
            throw new IllegalArgumentException("Called headPreTerminal on a leaf: " + this);
        } else {
            Tree head = hf.determineHead(this);
            if (head != null) {
                return head.headPreTerminal(hf);
            }
            log.info("Head preterminal is null: " + this);
            return null;
        }
    }

    /**
     * Finds the head words of each tree and assigns
     * HeadWordLabelAnnotation on each node pointing to the correct
     * CoreLabel.  This relies on the nodes being CoreLabels, so it
     * throws an IllegalArgumentException if this is ever not true.
     */
    public void percolateHeadAnnotations(HeadFinder hf) {
        if (!(label() instanceof CoreLabel)) {
            throw new IllegalArgumentException("Expected CoreLabels in the trees");
        }
        CoreLabel nodeLabel = (CoreLabel) label();

        if (isLeaf()) {
            return;
        }

        if (isPreTerminal()) {
            nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) children()[0].label());
            nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, nodeLabel);
            return;
        }

        for (Tree kid : children()) {
            kid.percolateHeadAnnotations(hf);
        }

        final Tree head = hf.determineHead(this);
        if (head == null) {
            throw new NullPointerException("HeadFinder " + hf + " returned null for " + this);
        } else if (head.isLeaf()) {
            nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.label());
            nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.parent(this).label());
        } else if (head.isPreTerminal()) {
            nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class,
                    (CoreLabel) head.children()[0].label());
            nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.label());
        } else {
            if (!(head.label() instanceof CoreLabel)) {
                throw new AssertionError("Horrible bug");
            }
            CoreLabel headLabel = (CoreLabel) head.label();
            nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class,
                    headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
            nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class,
                    headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
        }
    }

    /**
     * Finds the heads of the tree.  This code assumes that the label
     * does store and return sensible values for the category, word, and tag.
     * It will be a no-op otherwise.  The tree is modified.  The routine
     * assumes the Tree has word leaves and tag preterminals, and copies
     * their category to word and tag respectively, if they have a null
     * value.
     *
     * @param hf The headfinding algorithm to use
     */
    public void percolateHeads(HeadFinder hf) {
        Label nodeLabel = label();
        if (isLeaf()) {
            // Sanity check: word() is usually set by the TreeReader.
            if (nodeLabel instanceof HasWord) {
                HasWord w = (HasWord) nodeLabel;
                if (w.word() == null) {
                    w.setWord(nodeLabel.value());
                }
            }

        } else {
            for (Tree kid : children()) {
                kid.percolateHeads(hf);
            }

            final Tree head = hf.determineHead(this);
            if (head != null) {
                final Label headLabel = head.label();

                // Set the head tag.
                String headTag = (headLabel instanceof HasTag) ? ((HasTag) headLabel).tag() : null;
                if (headTag == null && head.isLeaf()) {
                    // below us is a leaf
                    headTag = nodeLabel.value();
                }

                // Set the head word
                String headWord = (headLabel instanceof HasWord) ? ((HasWord) headLabel).word() : null;
                if (headWord == null && head.isLeaf()) {
                    // below us is a leaf
                    // this might be useful despite case for leaf above in
                    // case the leaf label type doesn't support word()
                    headWord = headLabel.value();
                }

                // Set the head index
                int headIndex = (headLabel instanceof HasIndex) ? ((HasIndex) headLabel).index() : -1;

                if (nodeLabel instanceof HasWord) {
                    ((HasWord) nodeLabel).setWord(headWord);
                }
                if (nodeLabel instanceof HasTag) {
                    ((HasTag) nodeLabel).setTag(headTag);
                }
                if (nodeLabel instanceof HasIndex && headIndex >= 0) {
                    ((HasIndex) nodeLabel).setIndex(headIndex);
                }

            } else {
                log.info("Head is null: " + this);
            }
        }
    }

    /**
     * Return a Set of TaggedWord-TaggedWord dependencies, represented as
     * Dependency objects, for the Tree.  This will only give
     * useful results if the internal tree node labels support HasWord and
     * HasTag, and head percolation has already been done (see
     * percolateHeads()).
     *
     * @return Set of dependencies (each a Dependency)
     */
    public Set<Dependency<Label, Label, Object>> dependencies() {
        return dependencies(Filters.acceptFilter());
    }

    public Set<Dependency<Label, Label, Object>> dependencies(Predicate<Dependency<Label, Label, Object>> f) {
        return dependencies(f, true, true, false);
    }

    /**
     * Convert a constituency label to a dependency label. Options are provided for selecting annotations
     * to copy.
     *
     * @param oldLabel
     * @param copyLabel
     * @param copyIndex
     * @param copyPosTag
     */
    private static Label makeDependencyLabel(Label oldLabel, boolean copyLabel, boolean copyIndex,
            boolean copyPosTag) {
        if (!copyLabel)
            return oldLabel;

        String wordForm = (oldLabel instanceof HasWord) ? ((HasWord) oldLabel).word() : oldLabel.value();
        Label newLabel = oldLabel.labelFactory().newLabel(wordForm);
        if (newLabel instanceof HasWord)
            ((HasWord) newLabel).setWord(wordForm);
        if (copyPosTag && newLabel instanceof HasTag && oldLabel instanceof HasTag) {
            String tag = ((HasTag) oldLabel).tag();
            ((HasTag) newLabel).setTag(tag);
        }
        if (copyIndex && newLabel instanceof HasIndex && oldLabel instanceof HasIndex) {
            int index = ((HasIndex) oldLabel).index();
            ((HasIndex) newLabel).setIndex(index);
        }

        return newLabel;
    }

    /**
     * Return a set of TaggedWord-TaggedWord dependencies, represented as
     * Dependency objects, for the Tree.  This will only give
     * useful results if the internal tree node labels support HasWord and
     * head percolation has already been done (see percolateHeads()).
     *
     * @param f Dependencies are excluded for which the Dependency is not
     *          accepted by the Filter
     * @return Set of dependencies (each a Dependency)
     */
    public Set<Dependency<Label, Label, Object>> dependencies(Predicate<Dependency<Label, Label, Object>> f,
            boolean isConcrete, boolean copyLabel, boolean copyPosTag) {
        Set<Dependency<Label, Label, Object>> deps = Generics.newHashSet();
        for (Tree node : this) {
            // Skip leaves and unary re-writes
            if (node.isLeaf() || node.children().length < 2) {
                continue;
            }
            // Create the head label (percolateHeads has already been executed)
            Label headLabel = makeDependencyLabel(node.label(), copyLabel, isConcrete, copyPosTag);
            String headWord = ((HasWord) headLabel).word();
            if (headWord == null) {
                headWord = headLabel.value();
            }
            int headIndex = (isConcrete && (headLabel instanceof HasIndex)) ? ((HasIndex) headLabel).index() : -1;

            // every child with a different (or repeated) head is an argument
            boolean seenHead = false;
            for (Tree child : node.children()) {
                Label depLabel = makeDependencyLabel(child.label(), copyLabel, isConcrete, copyPosTag);
                String depWord = ((HasWord) depLabel).word();
                if (depWord == null) {
                    depWord = depLabel.value();
                }
                int depIndex = (isConcrete && (depLabel instanceof HasIndex)) ? ((HasIndex) depLabel).index() : -1;

                if (!seenHead && headIndex == depIndex && headWord.equals(depWord)) {
                    seenHead = true;
                } else {
                    Dependency<Label, Label, Object> dependency = (isConcrete && depIndex != headIndex)
                            ? new UnnamedConcreteDependency(headLabel, depLabel)
                            : new UnnamedDependency(headLabel, depLabel);

                    if (f.test(dependency)) {
                        deps.add(dependency);
                    }
                }
            }
        }
        return deps;
    }

    /**
     * Return a set of Label-Label dependencies, represented as
     * Dependency objects, for the Tree.  The Labels are the ones of the leaf
     * nodes of the tree, without mucking with them.
     *
     * @param f  Dependencies are excluded for which the Dependency is not
     *           accepted by the Filter
     * @param hf The HeadFinder to use to identify the head of constituents.
     *           The code assumes
     *           that it can use {@code headPreTerminal(hf)} to find a
     *           tag and word to make a CoreLabel.
     * @return Set of dependencies (each a {@code Dependency} between two
     *           {@code CoreLabel}s, which each contain a tag(), word(),
     *           and value(), the last two of which are identical).
     */
    public Set<Dependency<Label, Label, Object>> mapDependencies(Predicate<Dependency<Label, Label, Object>> f,
            HeadFinder hf) {
        if (hf == null) {
            throw new IllegalArgumentException("mapDependencies: need HeadFinder");
        }
        Set<Dependency<Label, Label, Object>> deps = Generics.newHashSet();
        for (Tree node : this) {
            if (node.isLeaf() || node.children().length < 2) {
                continue;
            }
            // Label l = node.label();
            // log.info("doing kids of label: " + l);
            //Tree hwt = node.headPreTerminal(hf);
            Tree hwt = node.headTerminal(hf);
            // log.info("have hf, found head preterm: " + hwt);
            if (hwt == null) {
                throw new IllegalStateException("mapDependencies: HeadFinder failed!");
            }

            for (Tree child : node.children()) {
                // Label dl = child.label();
                // Tree dwt = child.headPreTerminal(hf);
                Tree dwt = child.headTerminal(hf);
                if (dwt == null) {
                    throw new IllegalStateException("mapDependencies: HeadFinder failed!");
                }
                //log.info("kid is " + dl);
                //log.info("transformed to " + dml.toString("value{map}"));
                if (dwt != hwt) {
                    Dependency<Label, Label, Object> p = new UnnamedDependency(hwt.label(), dwt.label());
                    if (f.test(p)) {
                        deps.add(p);
                    }
                }
            }
        }
        return deps;
    }

    /**
     * Return a set of Label-Label dependencies, represented as
     * Dependency objects, for the Tree.  The Labels are the ones of the leaf
     * nodes of the tree, without mucking with them. The head of the sentence is a
     * dependent of a synthetic "root" label.
     *
     * @param f  Dependencies are excluded for which the Dependency is not
     *           accepted by the Filter
     * @param hf The HeadFinder to use to identify the head of constituents.
     *           The code assumes
     *           that it can use {@code headPreTerminal(hf)} to find a
     *           tag and word to make a CoreLabel.
     * @param    rootName Name of the root node.
     * @return   Set of dependencies (each a {@code Dependency} between two
     *           {@code CoreLabel}s, which each contain a tag(), word(),
     *           and value(), the last two of which are identical).
     */
    public Set<Dependency<Label, Label, Object>> mapDependencies(Predicate<Dependency<Label, Label, Object>> f,
            HeadFinder hf, String rootName) {
        Set<Dependency<Label, Label, Object>> deps = mapDependencies(f, hf);
        if (rootName != null) {
            Label hl = headTerminal(hf).label();
            CoreLabel rl = new CoreLabel();
            rl.set(CoreAnnotations.TextAnnotation.class, rootName);
            rl.set(CoreAnnotations.IndexAnnotation.class, 0);
            deps.add(new NamedDependency(rl, hl, rootName));
        }
        return deps;
    }

    /**
     * Gets the yield of the tree.  The {@code Label} of all leaf nodes
     * is returned
     * as a list ordered by the natural left to right order of the
     * leaves.  Null values, if any, are inserted into the list like any
     * other value.
     *
     * @return a {@code List} of the data in the tree's leaves.
     */
    public ArrayList<Label> yield() {
        return yield(new ArrayList<>());
    }

    /**
     * Gets the yield of the tree.  The {@code Label} of all leaf nodes
     * is returned
     * as a list ordered by the natural left to right order of the
     * leaves.  Null values, if any, are inserted into the list like any
     * other value.
     * <p><i>Implementation notes:</i> c. 2003: This has been rewritten to thread, so only one List
     * is used. 2007: This method was duplicated to start to give type safety to Sentence.
     * This method will now make a Word for any Leaf which does not itself implement HasWord, and
     * put the Word into the Sentence, so the Sentence elements MUST implement HasWord.
     *
     * @param y The list in which the yield of the tree will be placed.
     *          Normally, this will be empty when the routine is called, but
     *          if not, the new yield is added to the end of the list.
     * @return a {@code List} of the data in the tree's leaves.
     */
    public ArrayList<Label> yield(ArrayList<Label> y) {
        if (isLeaf()) {
            y.add(label());

        } else {
            Tree[] kids = children();
            for (Tree kid : kids) {
                kid.yield(y);
            }
        }
        return y;
    }

    public ArrayList<Word> yieldWords() {
        return yieldWords(new ArrayList<>());
    }

    public ArrayList<Word> yieldWords(ArrayList<Word> y) {
        if (isLeaf()) {
            y.add(new Word(label()));
        } else {
            for (Tree kid : children()) {
                kid.yieldWords(y);
            }
        }
        return y;
    }

    public <X extends HasWord> ArrayList<X> yieldHasWord() {
        return yieldHasWord(new ArrayList<>());
    }

    @SuppressWarnings("unchecked")
    public <X extends HasWord> ArrayList<X> yieldHasWord(ArrayList<X> y) {
        if (isLeaf()) {
            Label lab = label();
            // cdm: this is new hacked in stuff in Mar 2007 so we can now have a
            // well-typed version of a Sentence, whose objects MUST implement HasWord
            //
            // wsg (Feb. 2010) - More hacks for trees with CoreLabels in which the type implements
            // HasWord but only the value field is populated. This can happen if legacy code uses
            // LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf().
            if (lab instanceof HasWord) {
                if (lab instanceof CoreLabel) {
                    CoreLabel cl = (CoreLabel) lab;
                    if (cl.word() == null)
                        cl.setWord(cl.value());
                    y.add((X) cl);
                } else {
                    y.add((X) lab);
                }

            } else {
                y.add((X) new Word(lab));
            }

        } else {
            Tree[] kids = children();
            for (Tree kid : kids) {
                kid.yield(y);
            }
        }
        return y;
    }

    /**
     * Gets the yield of the tree.  The {@code Label} of all leaf nodes
     * is returned
     * as a list ordered by the natural left to right order of the
     * leaves.  Null values, if any, are inserted into the list like any
     * other value.  This has been rewritten to thread, so only one List
     * is used.
     *
     * @param y The list in which the yield of the tree will be placed.
     *          Normally, this will be empty when the routine is called, but
     *          if not, the new yield is added to the end of the list.
     * @return a {@code List} of the data in the tree's leaves.
     */
    @SuppressWarnings("unchecked")
    public <T> List<T> yield(List<T> y) {
        if (isLeaf()) {
            if (label() instanceof HasWord) {
                HasWord hw = (HasWord) label();
                hw.setWord(label().value());
            }
            y.add((T) label());

        } else {
            Tree[] kids = children();
            for (Tree kid : kids) {
                kid.yield(y);
            }
        }
        return y;
    }

    /**
     * Gets the tagged yield of the tree.
     * The {@code Label} of all leaf nodes is returned
     * as a list ordered by the natural left to right order of the
     * leaves.  Null values, if any, are inserted into the list like any
     * other value.
     *
     * @return a {@code List} of the data in the tree's leaves.
     */
    public ArrayList<TaggedWord> taggedYield() {
        return taggedYield(new ArrayList<>());
    }

    public List<LabeledWord> labeledYield() {
        return labeledYield(new ArrayList<>());
    }

    /**
     * Gets the tagged yield of the tree -- that is, get the preterminals
     * as well as the terminals.  The {@code Label} of all leaf nodes
     * is returned
     * as a list ordered by the natural left to right order of the
     * leaves.  Null values, if any, are inserted into the list like any
     * other value.  This has been rewritten to thread, so only one List
     * is used.
     * <br>
     * <i>Implementation note:</i> when we summon up enough courage, this
     * method will be changed to take and return a {@code List<W extends TaggedWord>}.
     *
     * @param ty The list in which the tagged yield of the tree will be
     *           placed. Normally, this will be empty when the routine is called,
     *           but if not, the new yield is added to the end of the list.
     * @return a {@code List} of the data in the tree's leaves.
     */
    public <X extends List<TaggedWord>> X taggedYield(X ty) {
        if (isPreTerminal()) {
            ty.add(new TaggedWord(firstChild().label(), label()));
        } else {
            for (Tree kid : children()) {
                kid.taggedYield(ty);
            }
        }
        return ty;
    }

    public List<LabeledWord> labeledYield(List<LabeledWord> ty) {
        if (isPreTerminal()) {
            ty.add(new LabeledWord(firstChild().label(), label()));
        } else {
            for (Tree kid : children()) {
                kid.labeledYield(ty);
            }
        }
        return ty;
    }

    /** Returns a {@code List<CoreLabel>} from the tree.
     *  These are a copy of the complete token representation
     *  that adds the tag as the tag and value.
     *
     *  @return A tagged, labeled yield.
     */
    public List<CoreLabel> taggedLabeledYield() {
        List<CoreLabel> ty = new ArrayList<>();
        taggedLabeledYield(ty, 0);
        return ty;
    }

    private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
        if (isPreTerminal()) {
            // usually this will fill in all the usual keys for a token
            CoreLabel taggedWord = new CoreLabel(firstChild().label());
            // but in case this just came from reading a tree that just has a value for words
            if (taggedWord.word() == null) {
                taggedWord.setWord(firstChild().value());
            }
            final String tag = (value() == null) ? "" : value();
            // set value and tag to the tag
            taggedWord.setValue(tag);
            taggedWord.setTag(tag);
            taggedWord.setIndex(termIdx);
            ty.add(taggedWord);

            return termIdx + 1;

        } else {
            for (Tree kid : getChildrenAsList())
                termIdx = kid.taggedLabeledYield(ty, termIdx);
        }

        return termIdx;
    }

    /**
     * Gets the preterminal yield (i.e., tags) of the tree.  All data in
     * preterminal nodes is returned as a list ordered by the natural left to
     * right order of the tree.  Null values, if any, are inserted into the
     * list like any other value.  Pre-leaves are nodes of height 1.
     *
     * @return a {@code List} of the data in the tree's pre-leaves.
     */
    public List<Label> preTerminalYield() {
        return preTerminalYield(new ArrayList<>());
    }

    /**
     * Gets the preterminal yield (i.e., tags) of the tree.  All data in
     * preleaf nodes is returned as a list ordered by the natural left to
     * right order of the tree.  Null values, if any, are inserted into the
     * list like any other value.  Pre-leaves are nodes of height 1.
     *
     * @param y The list in which the preterminals of the tree will be
     *          placed. Normally, this will be empty when the routine is called,
     *          but if not, the new yield is added to the end of the list.
     * @return a {@code List} of the data in the tree's pre-leaves.
     */
    public List<Label> preTerminalYield(List<Label> y) {
        if (isPreTerminal()) {
            y.add(label());
        } else {
            Tree[] kids = children();
            for (Tree kid : kids) {
                kid.preTerminalYield(y);
            }
        }
        return y;
    }

    /**
     * Gets the leaves of the tree.  All leaves nodes are returned as a list
     * ordered by the natural left to right order of the tree.  Null values,
     * if any, are inserted into the list like any other value.
     *
     * @return a {@code List} of the leaves.
     */
    public <T extends Tree> List<T> getLeaves() {
        return getLeaves(new ArrayList<>());
    }

    /**
     * Gets the leaves of the tree.
     *
     * @param list The list in which the leaves of the tree will be
     *             placed. Normally, this will be empty when the routine is called,
     *             but if not, the new yield is added to the end of the list.
     * @return a {@code List} of the leaves.
     */
    @SuppressWarnings("unchecked")
    public <T extends Tree> List<T> getLeaves(List<T> list) {
        if (isLeaf()) {
            list.add((T) this);
        } else {
            for (Tree kid : children()) {
                kid.getLeaves(list);
            }
        }
        return list;
    }

    /**
     * Get the set of all node and leaf {@code Label}s,
     * null or otherwise, contained in the tree.
     *
     * @return the {@code Collection} (actually, Set) of all values
     *         in the tree.
     */
    @Override
    public Collection<Label> labels() {
        Set<Label> n = Generics.newHashSet();
        n.add(label());
        Tree[] kids = children();
        for (Tree kid : kids) {
            n.addAll(kid.labels());
        }
        return n;
    }

    @Override
    public void setLabels(Collection<Label> c) {
        throw new UnsupportedOperationException("Can't set Tree labels");
    }

    /**
     * Return a flattened version of a tree.  In many circumstances, this
     * will just return the tree, but if the tree is something like a
     * binarized version of a dependency grammar tree, then it will be
     * flattened back to a dependency grammar tree representation.  Formally,
     * a node will be removed from the tree when: it is not a terminal or
     * preterminal, and its {@code label()} is {@code equal()} to
     * the {@code label()} of its parent, and all its children will
     * then be promoted to become children of the parent (in the same
     * position in the sequence of daughters.
     *
     * @return A flattened version of this tree.
     */
    public Tree flatten() {
        return flatten(treeFactory());
    }

    /**
     * Return a flattened version of a tree.  In many circumstances, this
     * will just return the tree, but if the tree is something like a
     * binarized version of a dependency grammar tree, then it will be
     * flattened back to a dependency grammar tree representation.  Formally,
     * a node will be removed from the tree when: it is not a terminal or
     * preterminal, and its {@code label()} is {@code equal()} to
     * the {@code label()} of its parent, and all its children will
     * then be promoted to become children of the parent (in the same
     * position in the sequence of daughters.
     *
     * Note: In the current implementation, the tree structure is mainly
     * duplicated, but the links between preterminals and terminals aren't.
     *
     * @param tf TreeFactory used to create tree structure for flattened tree
     * @return A flattened version of this tree.
     */
    public Tree flatten(TreeFactory tf) {
        if (isLeaf() || isPreTerminal()) {
            return this;
        }
        Tree[] kids = children();
        List<Tree> newChildren = new ArrayList<>(kids.length);
        for (Tree child : kids) {
            if (child.isLeaf() || child.isPreTerminal()) {
                newChildren.add(child);
            } else {
                Tree newChild = child.flatten(tf);
                if (label().equals(newChild.label())) {
                    newChildren.addAll(newChild.getChildrenAsList());
                } else {
                    newChildren.add(newChild);
                }
            }
        }
        return tf.newTreeNode(label(), newChildren);
    }

    /**
     * Get the set of all subtrees inside the tree by returning a tree
     * rooted at each node.  These are <i>not</i> copies, but all share
     * structure.  The tree is regarded as a subtree of itself.
     *
     * <i>Note:</i> If you only want to form this Set so that you can
     * iterate over it, it is more efficient to simply use the Tree class's
     * own {@code iterator()} method. This will iterate over the exact same
     * elements (but perhaps/probably in a different order).
     *
     * @return the {@code Set} of all subtrees in the tree.
     */
    public Set<Tree> subTrees() {
        return subTrees(Generics.newHashSet());
    }

    /**
     * Get the list of all subtrees inside the tree by returning a tree
     * rooted at each node.  These are <i>not</i> copies, but all share
     * structure.  The tree is regarded as a subtree of itself.
     *
     * <i>Note:</i> If you only want to form this Collection so that you can
     * iterate over it, it is more efficient to simply use the Tree class's
     * own {@code iterator()} method. This will iterate over the exact same
     * elements (but perhaps/probably in a different order).
     *
     * @return the {@code List} of all subtrees in the tree.
     */
    public List<Tree> subTreeList() {
        return subTrees(new ArrayList<>());
    }

    /**
     * Add the set of all subtrees inside a tree (including the tree itself)
     * to the given {@code Collection}.
     *
     * <i>Note:</i> If you only want to form this Collection so that you can
     * iterate over it, it is more efficient to simply use the Tree class's
     * own {@code iterator()} method. This will iterate over the exact same
     * elements (but perhaps/probably in a different order).
     *
     * @param n A collection of nodes to which the subtrees will be added.
     * @return The collection parameter with the subtrees added.
     */
    public <T extends Collection<Tree>> T subTrees(T n) {
        n.add(this);
        Tree[] kids = children();
        for (Tree kid : kids) {
            kid.subTrees(n);
        }
        return n;
    }

    /**
     * Makes a deep copy of not only the Tree structure but of the labels as well.
     * Uses the TreeFactory of the root node given by treeFactory().
     * Assumes that your labels give a non-null labelFactory().
     * (Added by Aria Haghighi.)
     *
     * @return A deep copy of the tree structure and its labels
     */
    public Tree deepCopy() {
        return deepCopy(treeFactory());
    }

    /**
     * Makes a deep copy of not only the Tree structure but of the labels as well.
     * The new tree will have nodes made by the given TreeFactory.
     * Each Label is copied using the labelFactory() returned
     * by the corresponding node's label.
     * It assumes that your labels give non-null labelFactory.
     * (Added by Aria Haghighi.)
     *
     * @param tf The TreeFactory used to make all nodes in the copied
     *           tree structure
     * @return A Tree that is a deep copy of the tree structure and
     *         Labels of the original tree.
     */
    public Tree deepCopy(TreeFactory tf) {
        return deepCopy(tf, label().labelFactory());
    }

    /**
     * Makes a deep copy of not only the Tree structure but of the labels as well.
     * Each tree is copied with the given TreeFactory.
     * Each Label is copied using the given LabelFactory.
     * That is, the tree and label factories can transform the nature of the
     * data representation.
     *
     * @param tf The TreeFactory used to make all nodes in the copied
     *           tree structure
     * @param lf The LabelFactory used to make all nodes in the copied
     *           tree structure
     * @return A Tree that is a deep copy of the tree structure and
     *         Labels of the original tree.
     */

    @SuppressWarnings({ "unchecked" })
    public Tree deepCopy(TreeFactory tf, LabelFactory lf) {
        Label label = lf.newLabel(label());
        if (isLeaf()) {
            return tf.newLeaf(label);
        }
        Tree[] kids = children();
        // NB: The below list may not be of type Tree but TreeGraphNode, so we leave it untyped
        List newKids = new ArrayList(kids.length);
        for (Tree kid : kids) {
            newKids.add(kid.deepCopy(tf, lf));
        }
        return tf.newTreeNode(label, newKids);
    }

    /**
     * Create a deep copy of the tree structure.  The entire structure is
     * recursively copied, but label data themselves are not cloned.
     * The copy is built using a {@code TreeFactory} that will
     * produce a {@code Tree} like the input one.
     *
     * @return A deep copy of the tree structure (but not its labels).
     */
    public Tree treeSkeletonCopy() {
        return treeSkeletonCopy(treeFactory());
    }

    /**
     * Create a deep copy of the tree structure.  The entire structure is
     * recursively copied, but label data themselves are not cloned.
     * By specifying an appropriate {@code TreeFactory}, this
     * method can be used to change the type of a {@code Tree}.
     *
     * @param tf The {@code TreeFactory} to be used for creating
     *           the returned {@code Tree}
     * @return A deep copy of the tree structure (but not its labels).
     */
    public Tree treeSkeletonCopy(TreeFactory tf) {
        Tree t;
        if (isLeaf()) {
            t = tf.newLeaf(label());
        } else {
            Tree[] kids = children();
            List<Tree> newKids = new ArrayList<>(kids.length);
            for (Tree kid : kids) {
                newKids.add(kid.treeSkeletonCopy(tf));
            }
            t = tf.newTreeNode(label(), newKids);
        }
        return t;
    }

    /**
     * Returns a deep copy of everything but the leaf labels.  The leaf
     * labels are reused from the original tree.  This is useful for
     * cases such as the dependency converter, which wants to finish
     * with the same labels in the dependencies as the parse tree.
     */
    public Tree treeSkeletonConstituentCopy() {
        return treeSkeletonConstituentCopy(treeFactory(), label().labelFactory());
    }

    public Tree treeSkeletonConstituentCopy(TreeFactory tf, LabelFactory lf) {
        if (isLeaf()) {
            // Reuse the current label for a leaf.  This way, trees which
            // are based on tokens in a sentence can have the same tokens
            // even after a "deep copy".
            // TODO: the LabeledScoredTreeFactory copies the label for a new
            // leaf.  Perhaps we could add a newLeafNoCopy or something like
            // that for efficiency.
            Tree newLeaf = tf.newLeaf(label());
            newLeaf.setLabel(label());
            return newLeaf;
        }
        Label label = lf.newLabel(label());
        Tree[] kids = children();
        List<Tree> newKids = new ArrayList<>(kids.length);
        for (Tree kid : kids) {
            newKids.add(kid.treeSkeletonConstituentCopy(tf, lf));
        }
        return tf.newTreeNode(label, newKids);
    }

    /**
     * Create a transformed Tree.  The tree is traversed in a depth-first,
     * left-to-right order, and the {@code TreeTransformer} is called
     * on each node.  It returns some {@code Tree}.  The transformed
     * tree has a new tree structure (i.e., a "deep copy" is done), but it
     * will usually share its labels with the original tree.
     *
     * @param transformer The function that transforms tree nodes or subtrees
     * @return a transformation of this {@code Tree}
     */
    public Tree transform(final TreeTransformer transformer) {
        return transform(transformer, treeFactory());
    }

    /**
     * Create a transformed Tree.  The tree is traversed in a depth-first,
     * left-to-right order, and the {@code TreeTransformer} is called
     * on each node.  It returns some {@code Tree}.  The transformed
     * tree has a new tree structure (i.e., a deep copy of the structure of the tree is done), but it
     * will usually share its labels with the original tree.
     *
     * @param transformer The function that transforms tree nodes or subtrees
     * @param tf          The {@code TreeFactory} which will be used for creating
     *                    new nodes for the returned {@code Tree}
     * @return a transformation of this {@code Tree}
     */
    public Tree transform(final TreeTransformer transformer, final TreeFactory tf) {
        Tree t;
        if (isLeaf()) {
            t = tf.newLeaf(label());
        } else {
            Tree[] kids = children();
            List<Tree> newKids = new ArrayList<>(kids.length);
            for (Tree kid : kids) {
                newKids.add(kid.transform(transformer, tf));
            }
            t = tf.newTreeNode(label(), newKids);
        }
        return transformer.transformTree(t);
    }

    /**
     * Creates a (partial) deep copy of the tree, where all nodes that the
     * filter does not accept are spliced out.  If the result is not a tree
     * (that is, it's a forest), an empty root node is generated.
     *
     * @param nodeFilter a Filter method which returns true to mean
     *                   keep this node, false to mean delete it
     * @return a filtered copy of the tree
     */
    public Tree spliceOut(final Predicate<Tree> nodeFilter) {
        return spliceOut(nodeFilter, treeFactory());
    }

    /**
     * Creates a (partial) deep copy of the tree, where all nodes that the
     * filter does not accept are spliced out.  That is, the particular
     * modes for which the {@code Filter} returns {@code false}
     * are removed from the {@code Tree}, but those nodes' children
     * are kept (assuming they pass the {@code Filter}, and they are
     * added in the appropriate left-to-right ordering as new children of
     * the parent node.  If the root node is deleted, so that the result
     * would not be a tree (that is, it's a forest), an empty root node is
     * generated.  If nothing is accepted, {@code null} is returned.
     *
     * @param nodeFilter a Filter method which returns true to mean
     *                   keep this node, false to mean delete it
     * @param tf         A {@code TreeFactory} for making new trees. Used if
     *                   the root node is deleted.
     * @return a filtered copy of the tree.
     */
    public Tree spliceOut(final Predicate<Tree> nodeFilter, final TreeFactory tf) {
        List<Tree> l = spliceOutHelper(nodeFilter, tf);
        if (l.isEmpty()) {
            return null;
        } else if (l.size() == 1) {
            return l.get(0);
        }
        // for a forest, make a new root
        return tf.newTreeNode((Label) null, l);
    }

    private List<Tree> spliceOutHelper(Predicate<Tree> nodeFilter, TreeFactory tf) {
        // recurse over all children first
        Tree[] kids = children();
        List<Tree> l = new ArrayList<>();
        for (Tree kid : kids) {
            l.addAll(kid.spliceOutHelper(nodeFilter, tf));
        }
        // check if this node is being spliced out
        if (nodeFilter.test(this)) {
            // no, so add our children and return
            Tree t;
            if (!l.isEmpty()) {
                t = tf.newTreeNode(label(), l);
            } else {
                t = tf.newLeaf(label());
            }
            l = new ArrayList<>(1);
            l.add(t);
            return l;
        }
        // we're out, so return our children
        return l;
    }

    /**
     * Creates a deep copy of the tree, where all nodes that the filter
     * does not accept and all children of such nodes are pruned.  If all
     * of a node's children are pruned, that node is cut as well.
     * A {@code Filter} can assume
     * that it will not be called with a {@code null} argument.
     * <br>
     * For example, the following code excises all PP nodes from a Tree: <br>
     * <tt>
     * Filter<Tree> f = new Filter<Tree> { <br>
     * public boolean accept(Tree t) { <br>
     * return ! t.label().value().equals("PP"); <br>
     * } <br>
     * }; <br>
     * tree.prune(f);
     * </tt> <br>
     *
     * If the root of the tree is pruned, null will be returned.
     *
     * @param filter the filter to be applied
     * @return a filtered copy of the tree, including the possibility of
     *         {@code null} if the root node of the tree is filtered
     */
    public Tree prune(final Predicate<Tree> filter) {
        return prune(filter, treeFactory());
    }

    /**
     * Creates a deep copy of the tree, where all nodes that the filter
     * does not accept and all children of such nodes are pruned.  If all
     * of a node's children are pruned, that node is cut as well.
     * A {@code Filter} can assume
     * that it will not be called with a {@code null} argument.
     *
     * @param filter the filter to be applied
     * @param tf     the TreeFactory to be used to make new Tree nodes if needed
     * @return a filtered copy of the tree, including the possibility of
     *         {@code null} if the root node of the tree is filtered
     */
    public Tree prune(Predicate<Tree> filter, TreeFactory tf) {
        // is the current node to be pruned?
        if (!filter.test(this)) {
            return null;
        }
        // if not, recurse over all children
        List<Tree> l = new ArrayList<>();
        Tree[] kids = children();
        for (Tree kid : kids) {
            Tree prunedChild = kid.prune(filter, tf);
            if (prunedChild != null) {
                l.add(prunedChild);
            }
        }
        // and check if this node has lost all its children
        if (l.isEmpty() && !(kids.length == 0)) {
            return null;
        }
        // if we're still ok, copy the node
        if (isLeaf()) {
            return tf.newLeaf(label());
        }
        return tf.newTreeNode(label(), l);
    }

    /**
     * Returns first child if this is unary and if the label at the current
     * node is either "ROOT" or empty.
     *
     * @return The first child if this is unary and if the label at the current
     * node is either "ROOT" or empty, else this
     */
    public Tree skipRoot() {
        if (!isUnaryRewrite())
            return this;
        String lab = label().value();
        return (lab == null || lab.isEmpty() || "ROOT".equals(lab)) ? firstChild() : this;
    }

    /**
     * Return a {@code TreeFactory} that produces trees of the
     * appropriate type.
     *
     * @return A factory to produce Trees
     */
    public abstract TreeFactory treeFactory();

    /**
     * Return the parent of the tree node.  This routine may return
     * {@code null} meaning simply that the implementation doesn't
     * know how to determine the parent node, rather than there is no
     * such node.
     *
     * @return The parent {@code Tree} node or {@code null}
     * @see Tree#parent(Tree)
     */
    public Tree parent() {
        throw new UnsupportedOperationException();
    }

    /**
     * Return the parent of the tree node.  This routine will traverse
     * a tree (depth first) from the given {@code root}, and will
     * correctly find the parent, regardless of whether the concrete
     * class stores parents.  It will only return {@code null} if this
     * node is the {@code root} node, or if this node is not
     * contained within the tree rooted at {@code root}.
     *
     * @param root The root node of the whole Tree
     * @return the parent {@code Tree} node if any;
     *         else {@code null}
     */
    public Tree parent(Tree root) {
        Tree[] kids = root.children();
        return parentHelper(root, kids, this);
    }

    private static Tree parentHelper(Tree parent, Tree[] kids, Tree node) {
        for (Tree kid : kids) {
            if (kid == node) {
                return parent;
            }
            Tree ret = node.parent(kid);
            if (ret != null) {
                return ret;
            }
        }
        return null;
    }

    /**
     * Returns the number of nodes the tree contains.  This method
     * implements the {@code size()} function required by the
     * {@code Collections} interface.  The size of the tree is the
     * number of nodes it contains (of all types, including the leaf nodes
     * and the root).
     *
     * @return The size of the tree
     * @see #depth()
     */
    @Override
    public int size() {
        int size = 1;
        Tree[] kids = children();
        for (Tree kid : kids) {
            size += kid.size();
        }
        return size;
    }

    /**
     * Return the ancestor tree node {@code height} nodes up from the current node.
     *
     * @param height How many nodes up to go. A parameter of 0 means return
     *               this node, 1 means to return the parent node and so on.
     * @param root The root node that this Tree is embedded under
     * @return The ancestor at height {@code height}.  It returns null
     *         if it does not exist or the tree implementation does not keep track
     *         of parents
     */
    public Tree ancestor(int height, Tree root) {
        if (height < 0) {
            throw new IllegalArgumentException("ancestor: height cannot be negative");
        }
        if (height == 0) {
            return this;
        }
        Tree par = parent(root);
        if (par == null) {
            return null;
        }
        return par.ancestor(height - 1, root);
    }

    private static class TreeIterator implements Iterator<Tree> {

        private final List<Tree> treeStack;

        protected TreeIterator(Tree t) {
            treeStack = new ArrayList<>();
            treeStack.add(t);
        }

        @Override
        public boolean hasNext() {
            return (!treeStack.isEmpty());
        }

        @Override
        public Tree next() {
            int lastIndex = treeStack.size() - 1;
            if (lastIndex < 0) {
                throw new NoSuchElementException("TreeIterator exhausted");
            }
            Tree tr = treeStack.remove(lastIndex);
            Tree[] kids = tr.children();
            // so that we can efficiently use one List, we reverse them
            for (int i = kids.length - 1; i >= 0; i--) {
                treeStack.add(kids[i]);
            }
            return tr;
        }

        /**
         * Not supported
         */
        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        @Override
        public String toString() {
            return "TreeIterator";
        }

    }

    /**
     * Returns an iterator over all the nodes of the tree.  This method
     * implements the {@code iterator()} method required by the
     * {@code Collections} interface.  It does a preorder
     * (children after node) traversal of the tree.  (A possible
     * extension to the class at some point would be to allow different
     * traversal orderings via variant iterators.)
     *
     * @return An iterator over the nodes of the tree
     */
    @Override
    public Iterator<Tree> iterator() {
        return new TreeIterator(this);
    }

    public List<Tree> postOrderNodeList() {
        List<Tree> nodes = new ArrayList<>();
        postOrderRecurse(this, nodes);
        return nodes;
    }

    private static void postOrderRecurse(Tree t, List<Tree> nodes) {
        for (Tree c : t.children()) {
            postOrderRecurse(c, nodes);
        }
        nodes.add(t);
    }

    public List<Tree> preOrderNodeList() {
        List<Tree> nodes = new ArrayList<>();
        preOrderRecurse(this, nodes);
        return nodes;
    }

    private static void preOrderRecurse(Tree t, List<Tree> nodes) {
        nodes.add(t);
        for (Tree c : t.children()) {
            preOrderRecurse(c, nodes);
        }
    }

    /**
     * This gives you a tree from a String representation (as a
     * bracketed Tree, of the kind produced by {@code toString()},
     * {@code pennPrint()}, or as in the Penn Treebank).
     * It's not the most efficient thing to do for heavy duty usage.
     * The Tree returned is created by a
     * LabeledScoredTreeReaderFactory. This means that "standard"
     * normalizations (stripping functional categories, indices,
     * empty nodes, and A-over-A nodes) will be done on it.
     *
     * @param str The tree as a bracketed list in a String.
     * @return The Tree
     * @throws RuntimeException If Tree format is not valid
     */
    public static Tree valueOf(String str) {
        return valueOf(str, new LabeledScoredTreeReaderFactory());
    }

    /**
     * This gives you a tree from a String representation (as a
     * bracketed Tree, of the kind produced by {@code toString()},
     * {@code pennPrint()}, or as in the Penn Treebank.
     * It's not the most efficient thing to do for heavy duty usage.
     *
     * @param str The tree as a bracketed list in a String.
     * @param trf The TreeFactory used to make the new Tree
     * @return The Tree
     * @throws RuntimeException If the Tree format is not valid
     */
    public static Tree valueOf(String str, TreeReaderFactory trf) {
        try {
            return trf.newTreeReader(new StringReader(str)).readTree();
        } catch (IOException ioe) {
            throw new RuntimeException("Tree.valueOf() tree construction failed", ioe);
        }
    }

    /**
     * Return the child at some daughter index.  The children are numbered
     * starting with an index of 0.
     *
     * @param i The daughter index
     * @return The tree at that daughter index
     */
    public Tree getChild(int i) {
        Tree[] kids = children();
        return kids[i];
    }

    /**
     * Destructively removes the child at some daughter index and returns it.
     * Note
     * that this method will throw an {@link ArrayIndexOutOfBoundsException} if
     * the daughter index is too big for the list of daughters.
     *
     * @param i The daughter index
     * @return The tree at that daughter index
     */
    public Tree removeChild(int i) {
        Tree[] kids = children();
        Tree kid = kids[i];
        Tree[] newKids = new Tree[kids.length - 1];
        for (int j = 0; j < newKids.length; j++) {
            if (j < i) {
                newKids[j] = kids[j];
            } else {
                newKids[j] = kids[j + 1];
            }
        }
        setChildren(newKids);
        return kid;
    }

    /**
     * Adds the tree t at the index position among the daughters.  Note
     * that this method will throw an {@link ArrayIndexOutOfBoundsException} if
     * the daughter index is too big for the list of daughters.
     *
     * @param i the index position at which to add the new daughter
     * @param t the new daughter
     */
    public void addChild(int i, Tree t) {
        Tree[] kids = children();
        Tree[] newKids = new Tree[kids.length + 1];
        if (i != 0) {
            System.arraycopy(kids, 0, newKids, 0, i);
        }
        newKids[i] = t;
        if (i != kids.length) {
            System.arraycopy(kids, i, newKids, i + 1, kids.length - i);
        }
        setChildren(newKids);
    }

    /**
     * Adds the tree t at the last index position among the daughters.
     *
     * @param t the new daughter
     */
    public void addChild(Tree t) {
        addChild(children().length, t);
    }

    /**
     * Replaces the {@code i}th child of {@code this} with the tree t.
     * Note
     * that this method will throw an {@link ArrayIndexOutOfBoundsException} if
     * the child index is too big for the list of children.
     *
     * @param i The index position at which to replace the child
     * @param t The new child
     * @return The tree that was previously the ith d
     */
    public Tree setChild(int i, Tree t) {
        Tree[] kids = children();
        Tree old = kids[i];
        kids[i] = t;
        return old;
    }

    /**
     * Returns true if {@code this} dominates the Tree passed in
     * as an argument.  Object equality (==) rather than .equals() is used
     * to determine domination.
     * t.dominates(t) returns false.
     */
    public boolean dominates(Tree t) {
        List<Tree> dominationPath = dominationPath(t);
        return dominationPath != null && dominationPath.size() > 1;
    }

    /**
     * Returns the path of nodes leading down to a dominated node,
     * including {@code this} and the dominated node itself.
     * Returns null if t is not dominated by {@code this}.  Object
     * equality (==) is the relevant criterion.
     * t.dominationPath(t) returns null.
     */
    public List<Tree> dominationPath(Tree t) {
        //Tree[] result = dominationPathHelper(t, 0);
        Tree[] result = dominationPath(t, 0);
        if (result == null) {
            return null;
        }
        return Arrays.asList(result);
    }

    private Tree[] dominationPathHelper(Tree t, int depth) {
        Tree[] kids = children();
        for (int i = kids.length - 1; i >= 0; i--) {
            Tree t1 = kids[i];
            if (t1 == null) {
                return null;
            }
            Tree[] result;
            if ((result = t1.dominationPath(t, depth + 1)) != null) {
                result[depth] = this;
                return result;
            }
        }
        return null;
    }

    private Tree[] dominationPath(Tree t, int depth) {
        if (this == t) {
            Tree[] result = new Tree[depth + 1];
            result[depth] = this;
            return result;
        }
        return dominationPathHelper(t, depth);
    }

    /**
     * Given nodes {@code t1} and {@code t2} which are
     * dominated by this node, returns a list of all the nodes on the
     * path from t1 to t2, inclusive, or null if none found.
     */
    public List<Tree> pathNodeToNode(Tree t1, Tree t2) {
        if (!contains(t1) || !contains(t2)) {
            return null;
        }
        if (t1 == t2) {
            return Collections.singletonList(t1);
        }
        if (t1.dominates(t2)) {
            return t1.dominationPath(t2);
        }
        if (t2.dominates(t1)) {
            List<Tree> path = t2.dominationPath(t1);
            Collections.reverse(path);
            return path;
        }
        Tree joinNode = joinNode(t1, t2);
        if (joinNode == null) {
            return null;
        }
        List<Tree> t1DomPath = joinNode.dominationPath(t1);
        List<Tree> t2DomPath = joinNode.dominationPath(t2);
        if (t1DomPath == null || t2DomPath == null) {
            return null;
        }
        ArrayList<Tree> path = new ArrayList<>(t1DomPath);
        Collections.reverse(path);
        path.remove(joinNode);
        path.addAll(t2DomPath);
        return path;
    }

    /**
     * Given nodes {@code t1} and {@code t2} which are
     * dominated by this node, returns their "join node": the node
     * {@code j} such that {@code j} dominates both
     * {@code t1} and {@code t2}, and every other node which
     * dominates both {@code t1} and {@code t2}
     * dominates {@code j}.
     * In the special case that t1 dominates t2, return t1, and vice versa.
     * Return {@code null} if no such node can be found.
     */
    public Tree joinNode(Tree t1, Tree t2) {
        if (!contains(t1) || !contains(t2)) {
            return null;
        }
        if (this == t1 || this == t2) {
            return this;
        }
        Tree joinNode = null;
        List<Tree> t1DomPath = dominationPath(t1);
        List<Tree> t2DomPath = dominationPath(t2);
        if (t1DomPath == null || t2DomPath == null) {
            return null;
        }
        Iterator<Tree> it1 = t1DomPath.iterator();
        Iterator<Tree> it2 = t2DomPath.iterator();
        while (it1.hasNext() && it2.hasNext()) {
            Tree n1 = it1.next();
            Tree n2 = it2.next();
            if (n1 != n2) {
                break;
            }
            joinNode = n1;
        }
        return joinNode;
    }

    /**
     * Given nodes {@code t1} and {@code t2} which are
     * dominated by this node, returns {@code true} iff
     * {@code t1} c-commands {@code t2}.  (A node c-commands
     * its sister(s) and any nodes below its sister(s).)
     */
    public boolean cCommands(Tree t1, Tree t2) {
        List<Tree> sibs = t1.siblings(this);
        if (sibs == null) {
            return false;
        }
        for (Tree sib : sibs) {
            if (sib == t2 || sib.contains(t2)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Returns the siblings of this Tree node.  The siblings are all
     * children of the parent of this node except this node.
     *
     * @param root The root within which this tree node is contained
     * @return The siblings as a list, an empty list if there are no siblings.
     *   The returned list is a modifiable new list structure, but contains
     *   the actual children.
     */
    public List<Tree> siblings(Tree root) {
        Tree parent = parent(root);
        if (parent == null) {
            return null;
        }
        List<Tree> siblings = parent.getChildrenAsList();
        siblings.remove(this);
        return siblings;
    }

    /**
     * insert {@code dtr} after {@code position} existing
     * daughters in {@code this}.
     */
    public void insertDtr(Tree dtr, int position) {
        Tree[] kids = children();
        if (position > kids.length) {
            throw new IllegalArgumentException("Can't insert tree after the " + position + "th daughter in " + this
                    + "; only " + kids.length + " daughters exist!");
        }
        Tree[] newKids = new Tree[kids.length + 1];
        int i = 0;
        for (; i < position; i++) {
            newKids[i] = kids[i];
        }
        newKids[i] = dtr;
        for (; i < kids.length; i++) {
            newKids[i + 1] = kids[i];
        }
        setChildren(newKids);
    }

    // --- composition methods to implement Label interface

    @Override
    public String value() {
        Label lab = label();
        if (lab == null) {
            return null;
        }
        return lab.value();
    }

    @Override
    public void setValue(String value) {
        Label lab = label();
        if (lab != null) {
            lab.setValue(value);
        }
    }

    @Override
    public void setFromString(String labelStr) {
        Label lab = label();
        if (lab != null) {
            lab.setFromString(labelStr);
        }
    }

    /**
     * Returns a factory that makes labels of the same type as this one.
     * May return {@code null} if no appropriate factory is known.
     *
     * @return the LabelFactory for this kind of label
     */
    @Override
    public LabelFactory labelFactory() {
        Label lab = label();
        if (lab == null) {
            return null;
        }
        return lab.labelFactory();
    }

    /**
     * Returns the positional index of the left edge of  <i>node</i> within the tree,
     * as measured by characters.  Returns -1 if <i>node is not found.</i>
     * Note: These methods were written for internal evaluation routines. They are
     * not the right methods to relate tree nodes to textual offsets. For these,
     * look at the appropriate annotations on a CoreLabel (CharacterOffsetBeginAnnotation, etc.).
     */
    public int leftCharEdge(Tree node) {
        MutableInteger i = new MutableInteger(0);
        if (leftCharEdge(node, i)) {
            return i.intValue();
        }
        return -1;
    }

    private boolean leftCharEdge(Tree node, MutableInteger i) {
        if (this == node) {
            return true;
        } else if (isLeaf()) {
            i.set(i.intValue() + value().length());
            return false;
        } else {
            for (Tree child : children()) {
                if (child.leftCharEdge(node, i)) {
                    return true;
                }
            }
            return false;
        }
    }

    /**
     * Returns the positional index of the right edge of  <i>node</i> within the tree,
     * as measured by characters. Returns -1 if <i>node is not found.</i>
     *
     * rightCharEdge returns the index of the rightmost character + 1, so that
     * rightCharEdge(getLeaves().get(i)) == leftCharEdge(getLeaves().get(i+1))
     *
     * Note: These methods were written for internal evaluation routines. They are
     * not the right methods to relate tree nodes to textual offsets. For these,
     * look at the appropriate annotations on a CoreLabel (CharacterOffsetBeginAnnotation, etc.).
     *
     * @param node The subtree to look for in this Tree
     * @return The positional index of the right edge of node
     */
    public int rightCharEdge(Tree node) {
        List<Tree> s = getLeaves();
        int length = 0;
        for (Tree leaf : s) {
            length += leaf.label().value().length();
        }
        MutableInteger i = new MutableInteger(length);
        if (rightCharEdge(node, i)) {
            return i.intValue();
        }
        return -1;
    }

    private boolean rightCharEdge(Tree node, MutableInteger i) {
        if (this == node) {
            return true;
        } else if (isLeaf()) {
            i.set(i.intValue() - label().value().length());
            return false;
        } else {
            for (int j = children().length - 1; j >= 0; j--) {
                if (children()[j].rightCharEdge(node, i)) {
                    return true;
                }
            }
            return false;
        }
    }

    /**
     * Calculates the node's <i>number</i>, defined as the number of nodes traversed in a left-to-right, depth-first search of the
     * tree starting at {@code root} and ending at {@code this}.  Returns -1 if {@code root} does not contain {@code this}.
     * @param root the root node of the relevant tree
     * @return the number of the current node, or -1 if {@code root} does not contain {@code this}.
     */
    public int nodeNumber(Tree root) {
        MutableInteger i = new MutableInteger(1);
        if (nodeNumberHelper(root, i))
            return i.intValue();
        return -1;
    }

    private boolean nodeNumberHelper(Tree t, MutableInteger i) {
        if (this == t) {
            return true;
        }
        i.incValue(1);
        for (Tree kid : t.children()) {
            if (nodeNumberHelper(kid, i))
                return true;
        }
        return false;
    }

    /**
     * Fetches the {@code i}th node in the tree, with node numbers defined
     * as in {@link #nodeNumber(Tree)}.
     *
     * @param i the node number to fetch
     * @return the {@code i}th node in the tree
     * @throws IndexOutOfBoundsException if {@code i} is not between 1 and
     *    the number of nodes (inclusive) contained in {@code this}.
     */
    public Tree getNodeNumber(int i) {
        return getNodeNumberHelper(new MutableInteger(1), i);
    }

    private Tree getNodeNumberHelper(MutableInteger i, int target) {
        int i1 = i.intValue();
        if (i1 == target)
            return this;
        if (i1 > target)
            throw new IndexOutOfBoundsException("Error -- tree does not contain " + i + " nodes.");
        i.incValue(1);
        for (Tree kid : children()) {
            Tree temp = kid.getNodeNumberHelper(i, target);
            if (temp != null)
                return temp;
        }
        return null;
    }

    /**
     * Assign sequential integer indices to the leaves of the tree
     * rooted at this {@code Tree}, starting with 1.
     * The leaves are traversed from left
     * to right. If the node is already indexed, then it uses the existing index.
     * This will only work if the leaves extend CoreMap.
     */
    public void indexLeaves() {
        indexLeaves(1, false);
    }

    /**
     * Index the leaves, and optionally overwrite existing IndexAnnotations if they exist.
     *
     * @param overWrite Whether to replace an existing index for a leaf.
     */
    public void indexLeaves(boolean overWrite) {
        indexLeaves(1, overWrite);
    }

    /**
     * Assign sequential integer indices to the leaves of the subtree
     * rooted at this {@code Tree}, beginning with
     * {@code startIndex}, and traversing the leaves from left
     * to right. If node is already indexed, then it uses the existing index.
     * This method only works if the labels of the tree implement
     * CoreLabel!
     *
     * @param startIndex index for this node
     * @param overWrite Whether to replace an existing index for a leaf.
     * @return the next index still unassigned
     */
    public int indexLeaves(int startIndex, boolean overWrite) {
        if (isLeaf()) {

            /*CoreLabel afl = (CoreLabel) label();
            Integer oldIndex = afl.get(CoreAnnotations.IndexAnnotation.class);
            if (!overWrite && oldIndex != null && oldIndex >= 0) {
              startIndex = oldIndex;
            } else {
              afl.set(CoreAnnotations.IndexAnnotation.class, startIndex);
            }*/

            if (label() instanceof HasIndex) {
                HasIndex hi = (HasIndex) label();
                int oldIndex = hi.index();
                if (!overWrite && oldIndex >= 0) {
                    startIndex = oldIndex;
                } else {
                    hi.setIndex(startIndex);
                }
                startIndex++;
            }
        } else {
            for (Tree kid : children()) {
                startIndex = kid.indexLeaves(startIndex, overWrite);
            }
        }
        return startIndex;
    }

    /**
     * Percolates terminal indices through a dependency tree. The terminals should be indexed, e.g.,
     * by calling indexLeaves() on the tree.
     * <p>
     * This method assumes CoreLabels!
     */
    public void percolateHeadIndices() {
        if (isPreTerminal()) {
            int nodeIndex = ((HasIndex) firstChild().label()).index();
            ((HasIndex) label()).setIndex(nodeIndex);
            return;
        }

        // Assign the head index to the first child that we encounter with a matching
        // surface form. Obviously a head can have the same surface form as its dependent,
        // and in this case the head index is ambiguous.
        String wordAnnotation = ((HasWord) label()).word();
        if (wordAnnotation == null) {
            wordAnnotation = value();
        }
        boolean seenHead = false;
        for (Tree child : children()) {
            child.percolateHeadIndices();
            String childWordAnnotation = ((HasWord) child.label()).word();
            if (childWordAnnotation == null) {
                childWordAnnotation = child.value();
            }
            if (!seenHead && wordAnnotation.equals(childWordAnnotation)) {
                seenHead = true;
                int nodeIndex = ((HasIndex) child.label()).index();
                ((HasIndex) label()).setIndex(nodeIndex);
            }
        }
    }

    /** Index all spans (constituents) in the tree.
     *  For this, spans uses 0-based indexing and the span records the fencepost
     *  to the left of the first word and after the last word of the span.
     *  The spans are only recorded if the Tree has labels of a class which
     *  extends CoreMap.
     */
    public void indexSpans() {
        indexSpans(0);
    }

    public void indexSpans(int startIndex) {
        indexSpans(new MutableInteger(startIndex));
    }

    /**
     * Assigns span indices (BeginIndexAnnotation and EndIndexAnnotation) to all nodes in a tree.
     * The beginning index is equivalent to the IndexAnnotation of the first leaf in the constituent.
     * The end index is equivalent to the first integer after the IndexAnnotation of the last leaf in the constituent.
     *
     * @param startIndex Begin indexing at this value
     */
    public Pair<Integer, Integer> indexSpans(MutableInteger startIndex) {
        int start = Integer.MAX_VALUE;
        int end = Integer.MIN_VALUE;

        if (isLeaf()) {
            start = startIndex.intValue();
            end = startIndex.intValue() + 1;
            startIndex.incValue(1);
        } else {
            for (Tree kid : children()) {
                Pair<Integer, Integer> span = kid.indexSpans(startIndex);
                if (span.first < start)
                    start = span.first;
                if (span.second > end)
                    end = span.second;
            }
        }

        Label label = label();
        if (label instanceof CoreMap) {
            CoreMap afl = (CoreMap) label();
            afl.set(CoreAnnotations.BeginIndexAnnotation.class, start);
            afl.set(CoreAnnotations.EndIndexAnnotation.class, end);
        }
        return new Pair<>(start, end);
    }

}