edu.stanford.nlp.trees.Constituent.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.trees.Constituent.java

Source

package edu.stanford.nlp.trees;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.util.Scored;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;

/**
 * A <code>Constituent</code> object defines a generic edge in a graph.
 * The <code>Constituent</code> class is designed to be extended.  It
 * implements the <code>Comparable</code> interface in order to allow
 * graphs to be topologically sorted by the ordinary <code>Collection</code>
 * library in <code>java.util</code>, keying primarily on right-hand
 * node ID number.  The <code>Constituent</code> class implements most
 * of the functionality of the the <code>Label</code>
 * interface by passing all requests down to the <code>Label</code> which
 * might be contained in the <code>Constituent</code>.  This allows one
 * to put a <code>Constituent</code> anywhere that a <code>Label</code> is
 * required.  A <code>Constituent</code> is always <code>Scored</code>.
 *
 * @author Christopher Manning
 */
public abstract class Constituent implements Labeled, Scored, Label {

    public Constituent() {
    }

    /**
     * access start node.
     */
    public abstract int start();

    /**
     * set start node.
     */
    public abstract void setStart(int start);

    /**
     * access end node.
     */
    public abstract int end();

    /**
     * set end node.
     */
    public abstract void setEnd(int end);

    /**
     * access label
     */
    public Label label() {
        return null;
    }

    /**
     * Sets the label associated with the current Constituent,
     * if there is one.
     */
    public void setLabel(Label label) {
        // a noop
    }

    /**
     * Access labels -- actually always a singleton here.
     */
    public Collection<Label> labels() {
        return Collections.singletonList(label());
    }

    public void setLabels(Collection<Label> labels) {
        throw new UnsupportedOperationException("Constituent can't be multilabeled");
    }

    /**
     * access score
     */
    public double score() {
        return Double.NaN;
    }

    /**
     * Sets the score associated with the current node, if there is one
     */
    public void setScore(double score) {
        // a no-op
    }

    /**
     * Return a string representation of a <code>Constituent</code>.
     *
     * @return The full string representation.
     */
    @Override
    public String toString() {
        StringBuffer sb;
        Label lab = label();
        if (lab != null) {
            sb = new StringBuffer(lab.toString());
        } else {
            sb = new StringBuffer();
        }
        sb.append("(").append(start()).append(",").append(end()).append(")");
        return sb.toString();
    }

    /**
     * Return the length of a <code>Constituent</code>
     */
    public int size() {
        return end() - start();
    }

    /**
     * Compare with another Object for equality.
     * Two Constituent objects are equal if they have the same start and end,
     * and, if at least one of them has a non-null label, then their labels are equal.
     * The score of a Constituent is not considered in the equality test.
     * This seems to make sense for most of the applications we have in mind
     * where one wants to assess equality independent of score, and then if
     * necessary to relax a constituent if one with a better score is found.
     * (Note, however, that if you do want to compare Constituent scores for
     * equality, then you have to be careful,
     * because two <code>double</code> NaN values are considered unequal in
     * Java.)
     * The general contract of equals() implies that one can't have a
     * subclass of a concrete [non-abstract] class redefine equals() to use
     * extra aspects, so subclasses shouldn't override this in ways that
     * make use of extra fields.
     *
     * @param obj The object being compared with
     * @return true if the objects are equal
     */
    @Override
    public boolean equals(Object obj) {
        // unclear if this will be a speedup in general
        // if (this == o)
        //      return true;
        if (obj instanceof Constituent) {
            Constituent c = (Constituent) obj;
            // System.out.println("Comparing " + this + " to " + c + "\n  " +
            //   "start: " + (start() == c.start()) + " end: " +
            //   (end() == c.end()) + " score: " + (score() == c.score()));
            if ((start() == c.start()) && (end() == c.end())) {
                Label lab1 = label();
                Label lab2 = c.label();
                if (lab1 == null) {
                    return lab2 == null;
                }

                String lv1 = lab1.value();
                String lv2 = lab2.value();
                if (lv1 == null && lv2 == null) {
                    return true;
                }
                if (lv1 != null && lv2 != null) {
                    return lab1.value().equals(lab2.value());
                }
            }
        }
        return false;
    }

    /**
     * A hashCode for Constituents done by shifting and or'ing for speed.
     * Now includes the label if the constituent has one (otherwise things
     * would work very badly if you were hashing constituents over the
     * same span....).
     *
     * @return the integer hashCode
     */
    @Override
    public int hashCode() {
        int hash = (start() << 16) | end();
        Label lab = label();
        return (lab == null || lab.value() == null) ? hash : hash ^ lab.value().hashCode();
    }

    /**
     * Detects whether this constituent overlaps a constituent without
     * nesting, that is, whether they "cross".
     *
     * @param c The constituent to check against
     * @return True if the two constituents cross
     */
    public boolean crosses(Constituent c) {
        return (start() < c.start() && c.start() < end() && end() < c.end())
                || (c.start() < start() && start() < c.end() && c.end() < end());
    }

    /**
     * Detects whether this constituent overlaps any of a Collection of
     * Constituents without
     * nesting, that is, whether it "crosses" any of them.
     *
     * @param constColl The set of constituent to check against
     * @return True if some constituent in the collection is crossed
     * @throws ClassCastException If some member of the Collection isn't
     *                            a Constituent
     */
    public boolean crosses(Collection<Constituent> constColl) {
        for (Constituent c : constColl) {
            if (crosses(c)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Detects whether this constituent contains a constituent, that is
     * whether they are nested.  That is, the other constituent's yield is
     * a sublist of this constituent's yield.
     *
     * @param c The constituent to check against
     * @return True if the other Constituent is contained in this one
     */
    public boolean contains(Constituent c) {
        return start() <= c.start() && end() >= c.end();
    }

    // -- below here is stuff to implement the Label interface

    /**
     * Return the value of the label (or null if none).
     *
     * @return String the value for the label
     */
    public String value() {
        Label lab = label();
        if (lab == null) {
            return null;
        }
        return lab.value();
    }

    /**
     * Set the value for the label (if one is stored).
     *
     * @param value The value for the label
     */
    public void setValue(String value) {
        Label lab = label();
        if (lab != null) {
            lab.setValue(value);
        }
    }

    /**
     * Make a new label with this <code>String</code> as the "name", perhaps
     * by doing some appropriate decoding of the string.
     *
     * @param labelStr the String that translates into the content of the
     *                 label
     */
    public void setFromString(String labelStr) {
        Label lab = label();
        if (lab != null) {
            lab.setFromString(labelStr);
        }
    }

    /**
     * Print out as a string the subpart of a sentence covered
     * by this <code>Constituent</code>.
     *
     * @return The subpart of the sentence
     */
    // TODO: genericize this!
    public String toSentenceString(ArrayList s) {
        StringBuilder sb = new StringBuilder();
        for (int wordNum = start(), end = end(); wordNum <= end; wordNum++) {
            sb.append(s.get(wordNum));
            if (wordNum != end) {
                sb.append(" ");
            }
        }
        return sb.toString();
    }

}