Source code

Java tutorial


Here is the source code for


package edu.stanford.nlp.semgraph;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MapFactory;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringParsingTask;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;

import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;

// todo [cdm 2013]: The treatment of roots in this class should probably be redone.
// todo [cdm 2013]: Probably we should put fake root node in graph and arc(s) from it.
// todo [cdm 2013]: At any rate, printing methods should print the root

 * Represents a semantic graph of a sentence or document, with IndexedWord
 * objects for nodes.
 * Notes:
 * The root is not at present represented as a vertex in the graph.
 * At present you need to get a root/roots
 * from the separate roots variable and to know about it.
 * This should maybe be changed, because otherwise, doing things like
 * simply getting the set of nodes or edges from the graph doesn't give
 * you root nodes or edges.
 * Given the kinds of representations that we normally use with
 * typedDependenciesCollapsed, there can be (small) cycles in a
 * SemanticGraph, and these cycles may involve the node that is conceptually the
 * root of the graph, so there may be no node without a parent node. You can
 * better get at the root(s) via the variable and methods provided.
 * There is no mechanism for returning all edges at once (e.g., {@code edgeSet()}).
 * This is intentional.  Use {@code edgeIterable()} to iterate over the edges if necessary.
 * @author Christopher Cox
 * @author Teg Grenager
 * @see SemanticGraphEdge
 * @see IndexedWord
public class SemanticGraph implements Serializable {

    /** A logger for this class */
    private static final Redwood.RedwoodChannels log = Redwood.channels(SemanticGraph.class);

    public static final boolean addSRLArcs = false;

    private static final SemanticGraphFormatter formatter = new SemanticGraphFormatter();

     * The distinguished root vertices, if known.
    private final Collection<IndexedWord> roots;

    private final DirectedMultiGraph<IndexedWord, SemanticGraphEdge> graph;

    private static final MapFactory<IndexedWord, Map<IndexedWord, List<SemanticGraphEdge>>> outerMapFactory = MapFactory
    private static final MapFactory<IndexedWord, List<SemanticGraphEdge>> innerMapFactory = MapFactory
    private static final MapFactory<IndexedWord, IndexedWord> wordMapFactory = MapFactory.hashMapFactory();

    private LinkedList<String> comments = new LinkedList<>();

    public int edgeCount() {
        return graph.getNumEdges();

    public int outDegree(IndexedWord vertex) {
        return graph.getOutDegree(vertex);

    public int inDegree(IndexedWord vertex) {
        return graph.getInDegree(vertex);

    public List<SemanticGraphEdge> getAllEdges(IndexedWord gov, IndexedWord dep) {
        return graph.getEdges(gov, dep);

    // TODO: this is a bad method to use because there can be multiple
    // edges.  All users of this method should be switched to iterating
    // over getAllEdges.  This has already been done for all uses
    // outside RTE.
    public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep) {
        List<SemanticGraphEdge> edges = graph.getEdges(gov, dep);
        if (edges == null || edges.isEmpty())
            return null;
        return edges.get(0);

    public void addVertex(IndexedWord vertex) {

    public boolean containsVertex(IndexedWord vertex) {
        return graph.containsVertex(vertex);

    public boolean containsEdge(IndexedWord source, IndexedWord target) {
        return graph.isEdge(source, target);

    public boolean containsEdge(SemanticGraphEdge edge) {
        return containsEdge(edge.getSource(), edge.getTarget());

    public Set<IndexedWord> vertexSet() {
        return graph.getAllVertices();

    public boolean removeEdge(SemanticGraphEdge e) {
        return graph.removeEdge(e.getSource(), e.getTarget(), e);

    public boolean removeVertex(IndexedWord vertex) {
        return graph.removeVertex(vertex);

     * This returns an ordered list of vertices (based upon their
     * indices in the sentence). This creates and sorts a list, so
     * prefer vertexSet unless you have a good reason to want nodes in
     * index order.
     * @return Ordered list of vertices
    public List<IndexedWord> vertexListSorted() {
        ArrayList<IndexedWord> vList = new ArrayList<>(vertexSet());
        return vList;

     * Returns an ordered list of edges in the graph.
     * This creates and sorts a list, so prefer edgeIterable().
     * @return A ordered list of edges in the graph.
    public List<SemanticGraphEdge> edgeListSorted() {
        ArrayList<SemanticGraphEdge> edgeList = new ArrayList<>();
        for (SemanticGraphEdge edge : edgeIterable()) {
        return edgeList;

    public Iterable<SemanticGraphEdge> edgeIterable() {
        return graph.edgeIterable();

    public Iterator<SemanticGraphEdge> outgoingEdgeIterator(IndexedWord v) {
        return graph.outgoingEdgeIterator(v);

    public Iterable<SemanticGraphEdge> outgoingEdgeIterable(IndexedWord v) {
        return graph.outgoingEdgeIterable(v);

    public Iterator<SemanticGraphEdge> incomingEdgeIterator(IndexedWord v) {
        return graph.incomingEdgeIterator(v);

    public Iterable<SemanticGraphEdge> incomingEdgeIterable(IndexedWord v) {
        return graph.incomingEdgeIterable(v);

    public List<SemanticGraphEdge> outgoingEdgeList(IndexedWord v) {
        return CollectionUtils.toList(outgoingEdgeIterable(v));

    public List<SemanticGraphEdge> incomingEdgeList(IndexedWord v) {
        return CollectionUtils.toList(incomingEdgeIterable(v));

    public boolean isEmpty() {
        return graph.isEmpty();

     * Searches up to 2 levels to determine how far ancestor is from child (i.e.,
     * returns 1 if "ancestor" is a parent, or 2 if ancestor is a grandparent.
     * @param child
     *          candidate child
     * @param ancestor
     *          candidate ancestor
     * @return the number of generations between "child" and "ancestor" (1 is an
     *         immediate parent), or -1 if there is no relationship found.
    public int isAncestor(IndexedWord child, IndexedWord ancestor) {

        Set<IndexedWord> parents = this.getParents(child);
        if (parents.contains(ancestor)) {
            return 1;
        for (IndexedWord parent : parents) {
            Set<IndexedWord> grandparents = this.getParents(parent);
            if (grandparents.contains(ancestor)) {
                return 2;
        return -1;

     * Return the maximum distance to a least common ancestor. We only search as
     * high as grandparents. We return -1 if no common parent or grandparent is
     * found.
     * @return The maximum distance to a least common ancestor.
    public int commonAncestor(IndexedWord v1, IndexedWord v2) {
        if (v1.equals(v2)) {
            return 0;

        Set<IndexedWord> v1Parents = this.getParents(v1);
        Set<IndexedWord> v2Parents = this.getParents(v2);
        Set<IndexedWord> v1GrandParents = wordMapFactory.newSet();
        Set<IndexedWord> v2GrandParents = wordMapFactory.newSet();

        if (v1Parents.contains(v2) || v2Parents.contains(v1)) {
            return 1;

        // does v1 have any parents that are v2's parents?
        for (IndexedWord v1Parent : v1Parents) {
            if (v2Parents.contains(v1Parent)) {
                return 1;
        // build v2 grandparents
        for (IndexedWord v2Parent : v2Parents) {
        if (v1GrandParents.contains(v2) || v2GrandParents.contains(v1)) {
            return 2;
        // Are any of v1's parents a grandparent of v2?
        for (IndexedWord v2GrandParent : v2GrandParents) {
            if (v1Parents.contains(v2GrandParent)) {
                return 2;
        // Are any of v2's parents a grandparent of v1?
        for (IndexedWord v1GrandParent : v1GrandParents) {
            if (v2Parents.contains(v1GrandParent)) {
                return 2;
        for (IndexedWord v2GrandParent : v2GrandParents) {
            if (v1GrandParents.contains(v2GrandParent)) {
                return 2;
        return -1;

     * Returns the least common ancestor. We only search as high as grandparents.
     * We return null if no common parent or grandparent is found. Any of the
     * input words can also be the answer if one is the parent or grandparent of
     * other, or if the input words are the same.
     * @return The least common ancestor.
    public IndexedWord getCommonAncestor(IndexedWord v1, IndexedWord v2) {
        if (v1.equals(v2)) {
            return v1;

        if (this.isAncestor(v1, v2) >= 1) {
            return v2;

        if (this.isAncestor(v2, v1) >= 1) {
            return v1;

        Set<IndexedWord> v1Parents = this.getParents(v1);
        Set<IndexedWord> v2Parents = this.getParents(v2);
        Set<IndexedWord> v1GrandParents = wordMapFactory.newSet();
        Set<IndexedWord> v2GrandParents = wordMapFactory.newSet();
        // does v1 have any parents that are v2's parents?

        for (IndexedWord v1Parent : v1Parents) {
            if (v2Parents.contains(v1Parent)) {
                return v1Parent;
        // does v1 have any grandparents that are v2's parents?
        for (IndexedWord v1GrandParent : v1GrandParents) {
            if (v2Parents.contains(v1GrandParent)) {
                return v1GrandParent;
        // build v2 grandparents
        for (IndexedWord v2Parent : v2Parents) {
        // does v1 have any parents or grandparents that are v2's grandparents?
        for (IndexedWord v2GrandParent : v2GrandParents) {
            if (v1Parents.contains(v2GrandParent)) {
                return v2GrandParent;
            if (v1GrandParents.contains(v2GrandParent)) {
                return v2GrandParent;
        return null;

    // todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE
    // If "det" is true, the search for a child is restricted to the "determiner"
    // grammatical relation.
    public boolean matchPatternToVertex(String pattern, IndexedWord vertex, boolean det) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        String pat = pattern.replaceAll("<", ",<");
        pat = pat.replaceAll(">", ",>");
        String[] nodePath = pat.split(",");
        for (String s : nodePath) {
            if (s.isEmpty()) {
            String word = s.substring(1);
            char dir = s.charAt(0);
            if (dir == '<') {
                // look for a matching parent
                boolean match = false;
                for (IndexedWord parent : getParents(vertex)) {
                    String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class);
                    if (lemma.equals(word)) {
                        match = true;
                if (!match) {
                    return false;
            } else if (dir == '>') {
                if (det) {
                    // look for a matching child with "det" relation
                    Set<IndexedWord> children = wordMapFactory.newSet();
                    children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.DETERMINER));
                    children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.PREDETERMINER));
                    boolean match = false;
                    for (IndexedWord child : children) {
                        String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
                        if (lemma.isEmpty()) {
                            lemma = child.word().toLowerCase();
                        if (lemma.equals(word)) {
                            match = true;
                    if (!match) {
                        return false;
                } else {// take any relation, except "det"
                    List<Pair<GrammaticalRelation, IndexedWord>> children = childPairs(vertex);
                    boolean match = false;
                    for (Pair<GrammaticalRelation, IndexedWord> pair : children) {
                        if (pair.first().toString().equals("det"))
                        IndexedWord child = pair.second();
                        String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
                        if (lemma.isEmpty()) {
                            lemma = child.word().toLowerCase();
                        if (lemma.equals(word)) {
                            match = true;
                    if (!match) {
                        return false;
            } else {
                throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern);
        return true;

    // todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE
    public boolean matchPatternToVertex(String pattern, IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        String pat = pattern.replaceAll("<", ",<");
        pat = pat.replaceAll(">", ",>");
        String[] nodePath = pat.split(",");
        for (String s : nodePath) {
            if (s.isEmpty()) {
            String word = s.substring(1);
            char dir = s.charAt(0);
            if (dir == '<') {
                // look for a matching parent
                boolean match = false;
                for (IndexedWord parent : getParents(vertex)) {
                    String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class);
                    if (lemma.equals(word)) {
                        match = true;
                if (!match) {
                    return false;
            } else if (dir == '>') {
                // look for a matching child
                boolean match = false;
                for (IndexedWord child : getChildren(vertex)) {
                    String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
                    if (lemma == null || lemma.isEmpty()) {
                        lemma = child.word().toLowerCase();
                    if (lemma.equals(word)) {
                        match = true;
                if (!match) {
                    return false;
            } else {
                throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern);
        return true;

    public List<IndexedWord> getChildList(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        List<IndexedWord> result = new ArrayList<>(getChildren(vertex));
        return result;

    public Set<IndexedWord> getChildren(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        return graph.getChildren(vertex);

    public boolean hasChildren(IndexedWord vertex) {
        return outgoingEdgeIterator(vertex).hasNext();

    public List<SemanticGraphEdge> getIncomingEdgesSorted(IndexedWord vertex) {
        List<SemanticGraphEdge> edges = incomingEdgeList(vertex);
        return edges;

    public List<SemanticGraphEdge> getOutEdgesSorted(IndexedWord vertex) {
        List<SemanticGraphEdge> edges = outgoingEdgeList(vertex);
        return edges;

    public List<IndexedWord> getParentList(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        List<IndexedWord> result = new ArrayList<>(getParents(vertex));
        return result;

    public Set<IndexedWord> getParents(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        return graph.getParents(vertex);

     * Method for getting the siblings of a particular node. Siblings are the
     * other children of your parent, where parent is determined as the parent
     * returned by getParent
     * @return collection of sibling nodes (does not include vertex)
     *         the collection is empty if your parent is null
    public Collection<IndexedWord> getSiblings(IndexedWord vertex) {
        IndexedWord parent = this.getParent(vertex);
        if (parent != null) {
            Set<IndexedWord> result = wordMapFactory.newSet();
            result.remove(vertex);//remove this vertex - you're not your own sibling
            return result;
        } else {
            return Collections.emptySet();

     * Helper function for the public function with the same name.
     * <br>
     * Builds up the list backwards.
    private List<IndexedWord> getPathToRoot(IndexedWord vertex, List<IndexedWord> used) {

        // TODO: Apparently the order of the nodes in the path to the root
        // makes a difference for the RTE system.  Look into this some more
        List<IndexedWord> parents = getParentList(vertex);
        // Set<IndexedWord> parents = wordMapFactory.newSet();
        // parents.addAll(getParents(vertex));

        if (roots.contains(vertex) || (parents.isEmpty())) {
            used.remove(used.size() - 1);
            if (roots.contains(vertex))
                return Generics.newArrayList();
                return null; // no path found

        for (IndexedWord parent : parents) {
            List<IndexedWord> path = getPathToRoot(parent, used);
            if (path != null) {
                used.remove(used.size() - 1);
                return path;

        used.remove(used.size() - 1);
        return null;


     * Find the path from the given node to a root. The path does not include the
     * given node. Returns an empty list if vertex is a root. Returns null if a
     * root is inaccessible (should never happen).
    public List<IndexedWord> getPathToRoot(IndexedWord vertex) {
        List<IndexedWord> path = getPathToRoot(vertex, Generics.newArrayList());
        if (path != null)
        return path;

     * Return the real syntactic parent of vertex.
    public IndexedWord getParent(IndexedWord vertex) {
        List<IndexedWord> path = getPathToRoot(vertex);

        if (path != null && path.size() > 0)
            return path.get(0);
            return null;

     * Returns the <em>first</em> {@link edu.stanford.nlp.ling.IndexedWord
     * IndexedWord} in this {@code SemanticGraph} having the given integer index,
     * or throws {@code IllegalArgumentException} if no such node is found.
    public IndexedWord getNodeByIndex(int index) throws IllegalArgumentException {
        IndexedWord node = getNodeByIndexSafe(index);
        if (node == null)
            throw new IllegalArgumentException("No SemanticGraph vertex with index " + index);
            return node;

     * Same as above, but returns {@code null} if the index does not exist
     * (instead of throwing an exception).
    public IndexedWord getNodeByIndexSafe(int index) {
        for (IndexedWord vertex : vertexSet()) {
            if (vertex.index() == index) {
                return vertex;
        return null;

     * Returns the <em>first</em> {@link edu.stanford.nlp.ling.IndexedWord
     * IndexedWord} in this {@code SemanticGraph} having the given integer index,
     * or throws {@code IllegalArgumentException} if no such node is found.
    public IndexedWord getNodeByIndexAndCopyCount(int index, int copyCount) throws IllegalArgumentException {
        IndexedWord node = getNodeByIndexAndCopyCountSafe(index, copyCount);
        if (node == null)
            throw new IllegalArgumentException(
                    "No SemanticGraph vertex with index " + index + " and copyCount " + copyCount);
            return node;

     * Same as above, but returns {@code null} if the index does not exist
     * (instead of throwing an exception).
    public IndexedWord getNodeByIndexAndCopyCountSafe(int index, int copyCount) {
        for (IndexedWord vertex : vertexSet()) {
            if (vertex.index() == index && vertex.copyCount() == copyCount) {
                return vertex;
        return null;

     * Returns the <i>first</i> {@link edu.stanford.nlp.ling.IndexedWord
     * IndexedWord} in this {@code SemanticGraph} having the given word or
     * regex, or return null if no such found.
    public IndexedWord getNodeByWordPattern(String pattern) {
        Pattern p = Pattern.compile(pattern);
        for (IndexedWord vertex : vertexSet()) {
            String w = vertex.word();
            if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) {
                return vertex;
        return null;

     * Returns all nodes of type {@link edu.stanford.nlp.ling.IndexedWord
     * IndexedWord} in this {@code SemanticGraph} having the given word or
     * regex, or returns empty list if no such found.
    public List<IndexedWord> getAllNodesByWordPattern(String pattern) {
        Pattern p = Pattern.compile(pattern);
        List<IndexedWord> nodes = new ArrayList<>();
        for (IndexedWord vertex : vertexSet()) {
            String w = vertex.word();
            if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) {
        return nodes;

    public List<IndexedWord> getAllNodesByPartOfSpeechPattern(String pattern) {
        Pattern p = Pattern.compile(pattern);
        List<IndexedWord> nodes = new ArrayList<>();
        for (IndexedWord vertex : vertexSet()) {
            String pos = vertex.tag();
            if ((pos == null && pattern == null) || pos != null && p.matcher(pos).matches()) {
        return nodes;

     * Returns the set of descendants governed by this node in the graph.
    public Set<IndexedWord> descendants(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        // Do a depth first search
        Set<IndexedWord> descendantSet = wordMapFactory.newSet();
        descendantsHelper(vertex, descendantSet);
        return descendantSet;

    private void descendantsHelper(IndexedWord curr, Set<IndexedWord> descendantSet) {
        if (descendantSet.contains(curr)) {
        for (IndexedWord child : getChildren(curr)) {
            descendantsHelper(child, descendantSet);

     * Returns a list of pairs of a relation name and the child
     * IndexedFeatureLabel that bears that relation.
    public List<Pair<GrammaticalRelation, IndexedWord>> childPairs(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        List<Pair<GrammaticalRelation, IndexedWord>> childPairs = Generics.newArrayList();
        for (SemanticGraphEdge e : outgoingEdgeIterable(vertex)) {
            childPairs.add(new Pair<>(e.getRelation(), e.getTarget()));
        return childPairs;

     * Returns a list of pairs of a relation name and the parent
     * IndexedFeatureLabel to which we bear that relation.
    public List<Pair<GrammaticalRelation, IndexedWord>> parentPairs(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        List<Pair<GrammaticalRelation, IndexedWord>> parentPairs = Generics.newArrayList();
        for (SemanticGraphEdge e : incomingEdgeIterable(vertex)) {
            parentPairs.add(new Pair<>(e.getRelation(), e.getSource()));
        return parentPairs;

     * Returns a set of relations which this node has with its parents.
     * @return The set of relations which this node has with its parents.
    public Set<GrammaticalRelation> relns(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        Set<GrammaticalRelation> relns = Generics.newHashSet();
        List<Pair<GrammaticalRelation, IndexedWord>> pairs = parentPairs(vertex);
        for (Pair<GrammaticalRelation, IndexedWord> p : pairs) {
        return relns;

     * Returns the relation that node a has with node b.
     * Note: there may be multiple arcs between {@code a} and
     * {@code b}, and this method only returns one relation.
    public GrammaticalRelation reln(IndexedWord a, IndexedWord b) {
        if (!containsVertex(a)) {
            throw new IllegalArgumentException();

        List<Pair<GrammaticalRelation, IndexedWord>> pairs = childPairs(a);
        for (Pair<GrammaticalRelation, IndexedWord> p : pairs)
            if (p.second().equals(b))
                return p.first();

        return null;

     * Returns a list of relations which this node has with its children.
    public Set<GrammaticalRelation> childRelns(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        Set<GrammaticalRelation> relns = Generics.newHashSet();
        List<Pair<GrammaticalRelation, IndexedWord>> pairs = childPairs(vertex);
        for (Pair<GrammaticalRelation, IndexedWord> p : pairs) {
        return relns;

    public Collection<IndexedWord> getRoots() {
        return roots;

     * Initially looks for nodes which have no incoming arcs. If there are any, it
     * returns a list of them. If not, it looks for nodes from which every other
     * node is reachable. If there are any, it returns a list of them. Otherwise,
     * it returns an empty list.
     * @return A list of root nodes or an empty list.
    private List<IndexedWord> getVerticesWithoutParents() {
        List<IndexedWord> result = new ArrayList<>();
        for (IndexedWord v : vertexSet()) {
            int inDegree = inDegree(v);
            if (inDegree == 0) {
        return result;

    /** Returns the (first) root of this SemanticGraph. */
    public IndexedWord getFirstRoot() {
        if (roots.isEmpty())
            throw new RuntimeException("No roots in graph:\n" + this
                    + "\nFind where this graph was created and make sure you're adding roots.");
        return roots.iterator().next();

    public void addRoot(IndexedWord root) {

     * This method should not be used if possible. TODO: delete it
     * Recomputes the roots, based of actual candidates. This is done to
     * ensure a rooted tree after a sequence of edits. If the none of the vertices
     * can act as a root (due to a cycle), keep old rootset, retaining only the
     * existing vertices on that list.
     * TODO: this cannot deal with "Hamburg is a city which everyone likes", as
     * the intended root node,'Hamburg, is also the dobj of the relative clause. A
     * possible solution would be to create edgeset routines that allow filtering
     * over a predicate, and specifically filter out dobj relations for choosing
     * next best candidate. This could also be useful for dealing with
     * non-syntactic arcs in the future. TODO: There is also the possibility the
     * roots could be empty at the end, and will need to be resolved. TODO:
     * determine if this is a reasonably correct solution.
    public void resetRoots() {
        Collection<IndexedWord> newRoots = getVerticesWithoutParents();
        if (newRoots.size() > 0) {

         * else { Collection<IndexedWord> oldRoots = new
         * ArrayList<IndexedWord>(roots); for (IndexedWord oldRoot : oldRoots) { if
         * (!containsVertex(oldRoot)) removeVertex(oldRoot); } }

        // If no apparent root candidates are available, likely due to loop back
        // edges (rcmod), find the node that dominates the most nodes, and let
        // that be the new root. Note this implementation epitomizes K.I.S.S., and
        // is brain dead and non-optimal, and will require further work.
        TwoDimensionalCounter<IndexedWord, IndexedWord> nodeDists = TwoDimensionalCounter.identityHashMapCounter();
        for (IndexedWord node1 : vertexSet()) {
            for (IndexedWord node2 : vertexSet()) {
                // want directed paths only
                List<SemanticGraphEdge> path = getShortestDirectedPathEdges(node1, node2);
                if (path != null) {
                    int dist = path.size();
                    nodeDists.setCount(node1, node2, dist);

        // K.I.S.S. alg: just sum up and see who's on top, values don't have much
        // meaning outside of determining dominance.
        ClassicCounter<IndexedWord> dominatedEdgeCount = ClassicCounter.identityHashMapCounter();
        for (IndexedWord outer : vertexSet()) {
            for (IndexedWord inner : vertexSet()) {
                dominatedEdgeCount.incrementCount(outer, nodeDists.getCount(outer, inner));

        IndexedWord winner = Counters.argmax(dominatedEdgeCount);
        // TODO: account for multiply rooted graphs later


    public void setRoot(IndexedWord word) {

    public void setRoots(Collection<IndexedWord> words) {

     * @return A sorted list of the vertices
     * @throws IllegalStateException if this graph is not a DAG
    public List<IndexedWord> topologicalSort() {
        return graph.topologicalSort();

     * Does the given {@code vertex} have at least one child with the given {@code reln} and the lemma {@code childLemma}?
    public boolean hasChild(IndexedWord vertex, GrammaticalRelation reln, String childLemma) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
                if (edge.getTarget().get(CoreAnnotations.LemmaAnnotation.class).equals(childLemma)) {
                    return true;
        return false;

     * Does the given {@code vertex} have at least one child with the given {@code reln}?
    public boolean hasChildWithReln(IndexedWord vertex, GrammaticalRelation reln) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
                return true;
        return false;

     * Returns true if vertex has an incoming relation reln
     * @param vertex A node in this graph
     * @param reln The relation we want to check
     * @return true if vertex has an incoming relation reln
    public boolean hasParentWithReln(IndexedWord vertex, GrammaticalRelation reln) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
                return true;
        return false;

     * Returns the first IndexedFeatureLabel bearing a certain grammatical
     * relation, or null if none.
    public IndexedWord getChildWithReln(IndexedWord vertex, GrammaticalRelation reln) {
        if (vertex.equals(IndexedWord.NO_WORD))
            return null;
        if (!containsVertex(vertex))
            throw new IllegalArgumentException();

        for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
                return edge.getTarget();
        return null;

     * Returns a set of all parents bearing a certain grammatical relation, or an
     * empty set if none.
    public Set<IndexedWord> getParentsWithReln(IndexedWord vertex, GrammaticalRelation reln) {
        if (vertex.equals(IndexedWord.NO_WORD))
            return Collections.emptySet();
        if (!containsVertex(vertex))
            throw new IllegalArgumentException();

        Set<IndexedWord> parentList = wordMapFactory.newSet();
        for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
        return parentList;

     * Returns a set of all parents bearing a certain grammatical relation, or an
     * empty set if none.
    public Set<IndexedWord> getParentsWithReln(IndexedWord vertex, String relnName) {
        if (vertex.equals(IndexedWord.NO_WORD))
            return Collections.emptySet();
        if (!containsVertex(vertex))
            throw new IllegalArgumentException();

        Set<IndexedWord> parentList = wordMapFactory.newSet();
        for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) {
            if (edge.getRelation().toString().equals(relnName)) {
        return parentList;

     * Returns a set of all children bearing a certain grammatical relation, or
     * an empty set if none.
    public Set<IndexedWord> getChildrenWithReln(IndexedWord vertex, GrammaticalRelation reln) {
        if (vertex.equals(IndexedWord.NO_WORD))
            return Collections.emptySet();
        if (!containsVertex(vertex))
            throw new IllegalArgumentException();

        Set<IndexedWord> childList = wordMapFactory.newSet();
        for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
            if (edge.getRelation().equals(reln)) {
        return childList;

     * Returns a set of all children bearing one of a set of grammatical
     * relations, or an empty set if none.
     * NOTE: this will only work for relation types that are classes. Those that
     * are collapsed are currently not handled correctly since they are identified
     * by strings.
    public Set<IndexedWord> getChildrenWithRelns(IndexedWord vertex, Collection<GrammaticalRelation> relns) {
        if (vertex.equals(IndexedWord.NO_WORD))
            return Collections.emptySet();
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        Set<IndexedWord> childList = wordMapFactory.newSet();
        for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
            if (relns.contains(edge.getRelation())) {
        return childList;

     * Given a governor, dependent, and the relation between them, returns the
     * SemanticGraphEdge object of that arc if it exists, otherwise returns null.
    public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep, GrammaticalRelation reln) {
        Collection<SemanticGraphEdge> edges = getAllEdges(gov, dep);
        if (edges != null) {
            for (SemanticGraphEdge edge : edges) {
                if (!edge.getSource().equals(gov))
                if ((edge.getRelation().equals(reln))) {
                    return edge;
        return null;

    public boolean isNegatedVertex(IndexedWord vertex) {
        if (vertex == IndexedWord.NO_WORD) {
            return false;
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException("Vertex " + vertex + " not in graph " + this);

        return (hasChildWithReln(vertex, EnglishGrammaticalRelations.NEGATION_MODIFIER)
                || hasChild(vertex, GrammaticalRelation.DEPENDENT, "nor"));

    private boolean isNegatedVerb(IndexedWord vertex) {
        if (!containsVertex(vertex)) {
            throw new IllegalArgumentException();
        return (vertex.tag().startsWith("VB") && isNegatedVertex(vertex));

     * Check if the vertex is in a "conditional" context. Right now it's only
     * returning true if vertex has an "if" marker attached to it, i.e. the vertex
     * is in a clause headed by "if".
    public boolean isInConditionalContext(IndexedWord vertex) {
        for (IndexedWord child : getChildrenWithReln(vertex, EnglishGrammaticalRelations.MARKER)) {
            if (child.word().equalsIgnoreCase("if")) {
                return true;
        return false;

    // Obsolete; use functions in rte.feat.NegPolarityFeaturizers instead

    public boolean attachedNegatedVerb(IndexedWord vertex) {
        for (IndexedWord parent : getParents(vertex)) {
            if (isNegatedVerb(parent)) {
                return true;
        return false;

    /** Returns true iff this vertex stands in the "aux" relation to (any of)
     *  its parent(s).
    public boolean isAuxiliaryVerb(IndexedWord vertex) {
        Set<GrammaticalRelation> relns = relns(vertex);
        if (relns.isEmpty())
            return false;

        boolean result = relns.contains(EnglishGrammaticalRelations.AUX_MODIFIER)
                || relns.contains(EnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER);
        //"I say " + vertex + (result ? " is" : " is not") +
        // " an aux");
        return result;

    public Set<IndexedWord> getLeafVertices() {
        Set<IndexedWord> result = wordMapFactory.newSet();
        for (IndexedWord v : vertexSet()) {
            if (outDegree(v) == 0) {
        return result;

     * Returns the number of nodes in the graph
    public int size() {
        return this.vertexSet().size();

     * Returns all nodes reachable from {@code root}.
     * @param root the root node of the subgraph
     * @return all nodes in subgraph
    public Set<IndexedWord> getSubgraphVertices(IndexedWord root) {
        Set<IndexedWord> result = wordMapFactory.newSet();
        List<IndexedWord> queue = Generics.newLinkedList();
        while (!queue.isEmpty()) {
            IndexedWord current = queue.remove(0);
            for (IndexedWord child : this.getChildren(current)) {
                if (!result.contains(child)) {
        return result;

     * @return true if the graph contains no cycles.
    public boolean isDag() {
        Set<IndexedWord> unused = wordMapFactory.newSet();
        while (!unused.isEmpty()) {
            IndexedWord arbitrary = unused.iterator().next();
            boolean result = isDagHelper(arbitrary, unused, wordMapFactory.newSet());
            if (result) {
                return false;
        return true;

     * @param root root node of the subgraph.
     * @return true if the subgraph rooted at {@code root} contains no cycles.

    public boolean isDag(IndexedWord root) {
        Set<IndexedWord> unused = wordMapFactory.newSet();
        while (!unused.isEmpty()) {
            IndexedWord arbitrary = unused.iterator().next();
            boolean result = isDagHelper(arbitrary, unused, wordMapFactory.newSet());
            if (result) {
                return false;
        return true;

    private boolean isDagHelper(IndexedWord current, Set<IndexedWord> unused, Set<IndexedWord> trail) {
        if (trail.contains(current)) {
            return true;
        } else if (!unused.contains(current)) {
            return false;
        for (IndexedWord child : getChildren(current)) {
            boolean result = isDagHelper(child, unused, trail);
            if (result) {
                return true;

        return false;

    // ============================================================================
    // String display
    // ============================================================================

     * Recursive depth first traversal. Returns a structured representation of the
     * dependency graph.
     * Example:
     * <pre>
     *  -> need-3 (root)
     *    -> We-0 (nsubj)
     *    -> do-1 (aux)
     *    -> n't-2 (neg)
     *    -> badges-6 (dobj)
     *      -> no-4 (det)
     *      -> stinking-5 (amod)
     * </pre>
     * This is a quite ugly way to print a SemanticGraph.
     * You might instead want to try {@link #toString(OutputFormat)}.
    public String toString() {
        return toString(CoreLabel.OutputFormat.VALUE_TAG);

    public String toString(CoreLabel.OutputFormat wordFormat) {
        Collection<IndexedWord> rootNodes = getRoots();
        if (rootNodes.isEmpty()) {
            // Shouldn't happen, but return something!
            return toString(OutputFormat.READABLE);

        StringBuilder sb = new StringBuilder();
        Set<IndexedWord> used = wordMapFactory.newSet();
        for (IndexedWord root : rootNodes) {
            sb.append("-> ").append(root.toString(wordFormat)).append(" (root)\n");
            recToString(root, wordFormat, sb, 1, used);
        Set<IndexedWord> nodes = wordMapFactory.newSet();
        while (!nodes.isEmpty()) {
            IndexedWord node = nodes.iterator().next();
            recToString(node, wordFormat, sb, 1, used);
        return sb.toString();

    // helper for toString()
    private void recToString(IndexedWord curr, CoreLabel.OutputFormat wordFormat, StringBuilder sb, int offset,
            Set<IndexedWord> used) {
        List<SemanticGraphEdge> edges = outgoingEdgeList(curr);
        for (SemanticGraphEdge edge : edges) {
            IndexedWord target = edge.getTarget();
            sb.append(space(2 * offset)).append("-> ").append(target.toString(wordFormat)).append(" (")
            if (!used.contains(target)) { // recurse
                recToString(target, wordFormat, sb, offset + 1, used);

    private static String space(int width) {
        StringBuilder b = new StringBuilder();
        for (int i = 0; i < width; i++) {
            b.append(' ');
        return b.toString();

    public String toRecoveredSentenceString() {
        StringBuilder sb = new StringBuilder();
        boolean pastFirst = false;
        for (IndexedWord word : vertexListSorted()) {
            if (pastFirst) {
                sb.append(' ');
            pastFirst = true;
        return sb.toString();

    public String toRecoveredSentenceStringWithIndexMarking() {
        StringBuilder sb = new StringBuilder();
        boolean pastFirst = false;
        int index = 0;
        for (IndexedWord word : vertexListSorted()) {
            if (pastFirst) {
                sb.append(' ');
            pastFirst = true;
        return sb.toString();

     * Similar to {@code toRecoveredString}, but will fill in words that were
     * collapsed into relations (i.e. prep_for --> 'for'). Mostly to deal with
     * collapsed dependency trees.
     * TODO: consider merging with toRecoveredString() NOTE: assumptions currently
     * are for English. NOTE: currently takes immediate successors to current word
     * and expands them. This assumption may not be valid for other conditions or
     * languages?
    public String toEnUncollapsedSentenceString() {
        List<IndexedWord> uncompressedList = Generics.newLinkedList(vertexSet());
        List<Pair<String, IndexedWord>> specifics = Generics.newArrayList();

        // Collect the specific relations and the governed nodes, and then process
        // them one by one,
        // to avoid concurrent modification exceptions.
        for (IndexedWord word : vertexSet()) {
            for (SemanticGraphEdge edge : getIncomingEdgesSorted(word)) {
                GrammaticalRelation relation = edge.getRelation();
                // Extract the specific: need to account for possibility that relation
                // can
                // be a String or GrammaticalRelation (how did it happen this way?)
                String specific = relation.getSpecific();

                if (specific == null) {
                    if (edge.getRelation().equals(EnglishGrammaticalRelations.AGENT)) {
                        specific = "by";

                // Insert the specific at the leftmost token that is not governed by
                // this node.
                if (specific != null) {
                    Pair<String, IndexedWord> specPair = new Pair<>(specific, word);

        for (Pair<String, IndexedWord> tuple : specifics) {
            insertSpecificIntoList(tuple.first(), tuple.second(), uncompressedList);

        return StringUtils.join(uncompressedList, " ");

     * Inserts the given specific portion of an uncollapsed relation back into the
     * targetList
     * @param specific Specific relation to put in.
     * @param relnTgtNode Node governed by the uncollapsed relation
     * @param tgtList Target List of words
    private void insertSpecificIntoList(String specific, IndexedWord relnTgtNode, List<IndexedWord> tgtList) {
        int currIndex = tgtList.indexOf(relnTgtNode);
        Set<IndexedWord> descendants = descendants(relnTgtNode);
        IndexedWord specificNode = new IndexedWord();
        specificNode.set(CoreAnnotations.LemmaAnnotation.class, specific);
        specificNode.set(CoreAnnotations.TextAnnotation.class, specific);
        specificNode.set(CoreAnnotations.OriginalTextAnnotation.class, specific);
        while ((currIndex >= 1) && descendants.contains(tgtList.get(currIndex - 1))) {
        tgtList.add(currIndex, specificNode);

    public enum OutputFormat {

     * Returns a String representation of the result of this set of typed
     * dependencies in a user-specified format. Currently, four formats are
     * supported ({@link OutputFormat}):
     * <dl>
     * <dt>list</dt>
     * <dd>(Default.) Formats the dependencies as logical relations, as
     * exemplified by the following:
     * <pre>
     *  nsubj(died-1, Sam-0)
     *  tmod(died-1, today-2)
     * </pre>
     * </dd>
     * <dt>readable</dt>
     * <dd>Formats the dependencies as a table with columns {@code dependent}, {@code relation}, and {@code governor},
     * as exemplified by the following:
     * <pre>
     *  Sam-0               nsubj               died-1
     *  today-2             tmod                died-1
     * </pre>
     * </dd>
     * <dt>xml</dt>
     * <dd>Formats the dependencies as XML, as exemplified by the following:
     * <pre>
     *  &lt;dependencies&gt;
     *    &lt;dep type="nsubj"&gt;
     *      &lt;governor idx="1"&gt;died&lt;/governor&gt;
     *      &lt;dependent idx="0"&gt;Sam&lt;/dependent&gt;
     *    &lt;/dep&gt;
     *    &lt;dep type="tmod"&gt;
     *      &lt;governor idx="1"&gt;died&lt;/governor&gt;
     *      &lt;dependent idx="2"&gt;today&lt;/dependent&gt;
     *    &lt;/dep&gt;
     *  &lt;/dependencies&gt;
     * </pre>
     * </dd>
     * <dt>recursive</dt>
     * <dd>
     * The default output for {@link #toString()}
     * </dd>
     * </dl>
     * @param format A {@code String} specifying the desired format
     * @return A {@code String} representation of the typed dependencies in
     *         this {@code GrammaticalStructure}
    public String toString(OutputFormat format) {
        switch (format) {
        case XML:
            return toXMLString();
        case READABLE:
            return toReadableString();
        case LIST:
            return toList();
        case RECURSIVE:
            return toString();
            throw new IllegalArgumentException("Unsupported format " + format);

     * Returns a String representation of this graph as a list of typed
     * dependencies, as exemplified by the following:
     * <pre>
     *  nsubj(died-6, Sam-3)
     *  tmod(died-6, today-9)
     * </pre>
     * @return a {@code String} representation of this set of typed dependencies
    public String toList() {
        StringBuilder buf = new StringBuilder();
        for (IndexedWord root : getRoots()) {
            buf.append("root(ROOT-0, ");
        for (SemanticGraphEdge edge : this.edgeListSorted()) {
            buf.append(edge.getSource().toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(", ");
        return buf.toString();

     * Similar to toList(), but uses POS tags instead of word and index.
    public String toPOSList() {
        StringBuilder buf = new StringBuilder();
        for (SemanticGraphEdge edge : this.edgeListSorted()) {
        return buf.toString();

    private String toReadableString() {
        StringBuilder buf = new StringBuilder();
        buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov"));
        buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---"));
        for (IndexedWord root : getRoots()) {
            buf.append(String.format("%-20s%-20s%-20s%n", root.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX),
                    "root", "root"));
        for (SemanticGraphEdge edge : this.edgeListSorted()) {
        return buf.toString();

    private String toXMLString() {
        StringBuilder buf = new StringBuilder("<dependencies style=\"typed\">\n");
        for (SemanticGraphEdge edge : this.edgeListSorted()) {
            String reln = edge.getRelation().toString();
            String gov = (edge.getSource()).word();
            int govIdx = (edge.getSource()).index();
            String dep = (edge.getTarget()).word();
            int depIdx = (edge.getTarget()).index();
            buf.append("  <dep type=\"").append(reln).append("\">\n");
            buf.append("    <governor idx=\"").append(govIdx).append("\">").append(gov).append("</governor>\n");
            buf.append("    <dependent idx=\"").append(depIdx).append("\">").append(dep).append("</dependent>\n");
            buf.append("  </dep>\n");
        return buf.toString();

    public String toCompactString() {
        return toCompactString(false);

    public String toCompactString(boolean showTags) {
        StringBuilder sb = new StringBuilder();
        Set<IndexedWord> used = wordMapFactory.newSet();
        Collection<IndexedWord> roots = getRoots();
        if (roots.isEmpty()) {
            if (size() == 0) {
                return "[EMPTY_SEMANTIC_GRAPH]";
            } else {
                return "[UNROOTED_SEMANTIC_GRAPH]";
            // return toString("readable");
        for (IndexedWord root : roots) {
            toCompactStringHelper(root, sb, used, showTags);
        return sb.toString();

    private void toCompactStringHelper(IndexedWord node, StringBuilder sb, Set<IndexedWord> used,
            boolean showTags) {
        try {
            boolean isntLeaf = (outDegree(node) > 0);
            if (isntLeaf) {
            if (showTags) {
            for (SemanticGraphEdge edge : getOutEdgesSorted(node)) {
                IndexedWord target = edge.getTarget();
                sb.append(" ").append(edge.getRelation()).append(">");
                if (!used.contains(target)) { // avoid infinite loop
                    toCompactStringHelper(target, sb, used, showTags);
                } else {
                    if (showTags) {
            if (isntLeaf) {
        } catch (IllegalArgumentException e) {
  "WHOA!  SemanticGraph.toCompactStringHelper() ran into problems at node " + node);
            throw new IllegalArgumentException(e);

     * Returns a {@code String} representation of this semantic graph,
     * formatted by the default semantic graph formatter.
    public String toFormattedString() {
        return formatter.formatSemanticGraph(this);

     * Returns a {@code String} representation of this semantic graph,
     * formatted by the supplied semantic graph formatter.
    public String toFormattedString(SemanticGraphFormatter formatter) {
        return formatter.formatSemanticGraph(this);

     * Pretty-prints this semantic graph to {@code System.out}, formatted by
     * the supplied semantic graph formatter.
    public void prettyPrint(SemanticGraphFormatter formatter) {

     * Pretty-prints this semantic graph to {@code System.out}, formatted by
     * the default semantic graph formatter.
    public void prettyPrint() {

     * Returns an unnamed dot format digraph.
     * Nodes will be labeled with the word and edges will be labeled
     * with the dependency.
    public String toDotFormat() {
        return toDotFormat("");

     * Returns a dot format digraph with the given name.
     * Nodes will be labeled with the word and edges will be labeled
     * with the dependency.
    public String toDotFormat(String graphname) {
        return toDotFormat(graphname, CoreLabel.OutputFormat.VALUE_TAG_INDEX);

    public String toDotFormat(String graphname, CoreLabel.OutputFormat indexedWordFormat) {
        StringBuilder output = new StringBuilder();
        output.append("digraph " + graphname + " {\n");
        for (IndexedWord word : graph.getAllVertices()) {
            output.append("  N_" + word.index() + " [label=\"" + word.toString(indexedWordFormat) + "\"];\n");
        for (SemanticGraphEdge edge : graph.edgeIterable()) {
            output.append("  N_" + edge.getSource().index() + " -> N_" + edge.getTarget().index() + " [label=\""
                    + edge.getRelation() + "\"];\n");
        return output.toString();

    public SemanticGraphEdge addEdge(IndexedWord s, IndexedWord d, GrammaticalRelation reln, double weight,
            boolean isExtra) {
        SemanticGraphEdge newEdge = new SemanticGraphEdge(s, d, reln, weight, isExtra);
        graph.add(s, d, newEdge);
        return newEdge;

    public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
        SemanticGraphEdge newEdge = new SemanticGraphEdge(edge.getGovernor(), edge.getDependent(),
                edge.getRelation(), edge.getWeight(), edge.isExtra());
        graph.add(edge.getGovernor(), edge.getDependent(), newEdge);
        return newEdge;

    // =======================================================================

     * Tries to parse a String representing a SemanticGraph. Right now it's fairly
     * dumb, could be made more sophisticated.
     * <br>
     * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]"
     * <br>
     * This is the same format generated by toCompactString().
    public static SemanticGraph valueOf(String s, Language language) {
        return (new SemanticGraphParsingTask(s, language)).parse();

     * @see SemanticGraph#valueOf(String, Language)
    public static SemanticGraph valueOf(String s) {
        return valueOf(s, Language.UniversalEnglish);

    public SemanticGraph() {
        graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
        roots = wordMapFactory.newSet();

     * Returns a new SemanticGraph which is a copy of the supplied SemanticGraph.
     * Both the nodes ({@link IndexedWord}s) and the edges (SemanticGraphEdges)
     * are copied.
    public SemanticGraph(SemanticGraph g) {
        graph = new DirectedMultiGraph<>(g.graph);
        roots = wordMapFactory.newSet(g.roots);

     * Copies a the current graph, but also sets the mapping from the old to new
     * graph.
    public SemanticGraph(SemanticGraph g, Map<IndexedWord, IndexedWord> prevToNewMap) {
        graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
        if (prevToNewMap == null) {
            prevToNewMap = wordMapFactory.newMap();
        Set<IndexedWord> vertexes = g.vertexSet();
        for (IndexedWord vertex : vertexes) {
            IndexedWord newVertex = new IndexedWord(vertex);
            prevToNewMap.put(vertex, newVertex);

        roots = wordMapFactory.newSet();
        for (IndexedWord oldRoot : g.getRoots()) {
        for (SemanticGraphEdge edge : g.edgeIterable()) {
            IndexedWord newGov = prevToNewMap.get(edge.getGovernor());
            IndexedWord newDep = prevToNewMap.get(edge.getDependent());
            addEdge(newGov, newDep, edge.getRelation(), edge.getWeight(), edge.isExtra());

     * This is the constructor used by the parser.
    public SemanticGraph(Collection<TypedDependency> dependencies) {
        graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
        roots = wordMapFactory.newSet();

        for (TypedDependency d : dependencies) {
            IndexedWord gov =;
            IndexedWord dep = d.dep();
            GrammaticalRelation reln = d.reln();

            if (reln != ROOT) { // the root relation only points to the root: the governor is a fake node that we don't want to add in the graph
                // It is unnecessary to call addVertex, since addEdge will
                // implicitly add vertices if needed
                addEdge(gov, dep, reln, Double.NEGATIVE_INFINITY, d.extra());
            } else { //it's the root and we add it

        // there used to be an if clause that filtered out the case of empty
        // dependencies. However, I could not understand (or replicate) the error
        // it alluded to, and it led to empty dependency graphs for very short
        // fragments,
        // which meant they were ignored by the RTE system. Changed. (pado)
        // See also SemanticGraphFactory.makeGraphFromTree().

     * Returns the nodes in the shortest undirected path between two edges in the
     * graph. if source == target, returns a singleton list
     * @param source
     *          node
     * @param target
     *          node
     * @return nodes along shortest undirected path from source to target, in
     *         order
    public List<IndexedWord> getShortestUndirectedPathNodes(IndexedWord source, IndexedWord target) {
        return graph.getShortestPath(source, target, false);

    public List<SemanticGraphEdge> getShortestUndirectedPathEdges(IndexedWord source, IndexedWord target) {
        return graph.getShortestPathEdges(source, target, false);

     * Returns the shortest directed path between two edges in the graph.
     * @param source node
     * @param target node
     * @return shortest directed path from source to target
    public List<IndexedWord> getShortestDirectedPathNodes(IndexedWord source, IndexedWord target) {
        return graph.getShortestPath(source, target, true);

    public List<SemanticGraphEdge> getShortestDirectedPathEdges(IndexedWord source, IndexedWord target) {
        return graph.getShortestPathEdges(source, target, true);

    public SemanticGraph makeSoftCopy() {
        SemanticGraph newSg = new SemanticGraph();
        if (!this.roots.isEmpty())
        for (SemanticGraphEdge edge : this.edgeIterable()) {
            newSg.addEdge(edge.getSource(), edge.getTarget(), edge.getRelation(), edge.getWeight(), edge.isExtra());
        return newSg;

    // ============================================================================

    private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)");

     * This nested class is a helper for valueOf(). It represents the task of
     * parsing a specific String representing a SemanticGraph.
    private static class SemanticGraphParsingTask extends StringParsingTask<SemanticGraph> {

        private SemanticGraph sg;
        private Set<Integer> indexesUsed = Generics.newHashSet();
        private Language language;

        public SemanticGraphParsingTask(String s) {
            this(s, Language.UniversalEnglish);

        public SemanticGraphParsingTask(String s, Language language) {
            this.language = language;

        public SemanticGraph parse() {
            sg = new SemanticGraph();
            try {
                if (!isLeftBracket(peek()))
                    return null;
                readDep(null, null);
                return sg;
            } catch (ParserException e) {
      "SemanticGraphParser warning: " + e.getMessage());
                return null;

        private void readDep(IndexedWord gov, String reln) {
            if (!isLeftBracket(peek())) { // it's a leaf
                String label = readName();
                IndexedWord dep = makeVertex(label);
                if (gov == null)
                sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY,
            } else {
                String label = readName();
                IndexedWord dep = makeVertex(label);
                if (gov == null)
                if (gov != null && reln != null) {
                    sg.addEdge(gov, dep, GrammaticalRelation.valueOf(this.language, reln), Double.NEGATIVE_INFINITY,
                while (!isRightBracket(peek()) && !isEOF) {
                    reln = readName();
                    readDep(dep, reln);

        private IndexedWord makeVertex(String word) {
            Integer index; // initialized below
            Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
            if (wordAndIndex != null) {
                word = wordAndIndex.first();
                index = wordAndIndex.second();
            } else {
                index = getNextFreeIndex();
            // Note that, despite the use of indexesUsed and getNextFreeIndex(),
            // nothing is actually enforcing that no indexes are used twice. This
            // could occur if some words in the string representation being parsed
            // come with index markers and some do not.
            IndexedWord ifl = new IndexedWord(null, 0, index);
            //"SemanticGraphParsingTask>>> word = " + word);
            //"SemanticGraphParsingTask>>> index = " + index);
            //"SemanticGraphParsingTask>>> indexesUsed = " +
            // indexesUsed);
            String[] wordAndTag = word.split("/");
            ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]);
            ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
            if (wordAndTag.length > 1)
                ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
            return ifl;

        private static Pair<String, Integer> readWordAndIndex(String word) {
            Matcher matcher = WORD_AND_INDEX_PATTERN.matcher(word);
            if (!matcher.matches()) {
                return null;
            } else {
                word =;
                Integer index = Integer.valueOf(;
                return new Pair<>(word, index);

        private Integer getNextFreeIndex() {
            int i = 0;
            while (indexesUsed.contains(i))
            return i;

        private void readLeftBracket() {
            // System.out.println("Read left.");
            char ch = read();
            if (!isLeftBracket(ch))
                throw new ParserException("Expected left paren!");

        private void readRightBracket() {
            // System.out.println("Read right.");
            char ch = read();
            if (!isRightBracket(ch))
                throw new ParserException("Expected right paren!");

        private void readRelnSeparator() {
            if (isRelnSeparator(peek()))

        private static boolean isLeftBracket(char ch) {
            return ch == '[';

        private static boolean isRightBracket(char ch) {
            return ch == ']';

        private static boolean isRelnSeparator(char ch) {
            return ch == '>';

        protected boolean isPunct(char ch) {
            return isLeftBracket(ch) || isRightBracket(ch) || isRelnSeparator(ch);

    } // end SemanticGraphParsingTask

    // =======================================================================

    public boolean equals(Object o) {
        if (o == this) {
            return true;
        if (!(o instanceof SemanticGraph)) {
            return false;
        SemanticGraph g = (SemanticGraph) o;
        return graph.equals(g.graph) && roots.equals(g.roots);

    public int hashCode() {
        return graph.hashCode();

     * Given a semantic graph, and a target relation, returns a list of all
     * relations (edges) matching.
    public List<SemanticGraphEdge> findAllRelns(GrammaticalRelation tgtRelation) {
        ArrayList<SemanticGraphEdge> relns = new ArrayList<>();
        for (SemanticGraphEdge edge : edgeIterable()) {
            GrammaticalRelation edgeRelation = edge.getRelation();
            if ((edgeRelation != null) && (edgeRelation.equals(tgtRelation))) {
        return relns;

     * Given a semantic graph, and the short name of a target relation, returns a list of all
     * relations (edges) matching.
    public List<SemanticGraphEdge> findAllRelns(String tgtRelationShortname) {
        ArrayList<SemanticGraphEdge> relns = new ArrayList<>();
        for (SemanticGraphEdge edge : edgeIterable()) {
            GrammaticalRelation edgeRelation = edge.getRelation();
            if ((edgeRelation != null) && (edgeRelation.getShortName().equals(tgtRelationShortname))) {
        return relns;

     * Delete all duplicate edges.
    public void deleteDuplicateEdges() {

    /** Returns a list of TypedDependency in the graph.
     *  This method goes through all SemanticGraphEdge and converts them
     *  to TypedDependency.
     *  @return A List of TypedDependency in the graph
    public Collection<TypedDependency> typedDependencies() {
        Collection<TypedDependency> dependencies = new ArrayList<>();
        IndexedWord root = null;
        for (IndexedWord node : roots) {
            if (root == null) {
                root = new IndexedWord(node.docID(), node.sentIndex(), 0);
            TypedDependency dependency = new TypedDependency(ROOT, root, node);
        for (SemanticGraphEdge e : this.edgeIterable()) {
            TypedDependency dependency = new TypedDependency(e.getRelation(), e.getGovernor(), e.getDependent());
            if (e.isExtra()) {
        return dependencies;

     * Returns the span of the subtree yield of this node. That is, the span of all the nodes under it.
     * In the case of projective graphs, the words in this span are also the yield of the constituent rooted
     * at this node.
     * @param word The word acting as the root of the constituent we are finding.
     * @return A span, represented as a pair of integers. The span is zero indexed. The begin is inclusive and the end is exclusive.
    public Pair<Integer, Integer> yieldSpan(IndexedWord word) {
        int min = Integer.MAX_VALUE;
        int max = Integer.MIN_VALUE;
        Stack<IndexedWord> fringe = new Stack<>();
        while (!fringe.isEmpty()) {
            IndexedWord parent = fringe.pop();
            min = Math.min(min, parent.index() - 1);
            max = Math.max(max, parent.index());
            for (SemanticGraphEdge edge : outgoingEdgeIterable(parent)) {
                if (!edge.isExtra()) {
        return Pair.makePair(min, max);

     * Returns the yield of a node, i.e., all descendents of the node.
     * @param word The word acting as the root of the constituent we are finding.
    public List<IndexedWord> yield(IndexedWord word) {
        List<IndexedWord> yield = new LinkedList<>();
        Stack<IndexedWord> fringe = new Stack<>();
        while (!fringe.isEmpty()) {
            IndexedWord parent = fringe.pop();
            for (SemanticGraphEdge edge : outgoingEdgeIterable(parent)) {
                if (!edge.isExtra()) {


        return yield;

     * Store a comment line with this semantic graph.
     * @param comment
    public void addComment(String comment) {

     * Return the list of comments stored with this graph.
     * @return A list of comments.
    public List<String> getComments() {
        return this.comments;

    private static final long serialVersionUID = 1L;
