eu.interedition.collatex.nmerge.graph.MaximalUniqueMatch.java Source code

Java tutorial

Introduction

Here is the source code for eu.interedition.collatex.nmerge.graph.MaximalUniqueMatch.java

Source

/*
 * NMerge is Copyright 2009-2011 Desmond Schmidt
 *
 * This file is part of NMerge. NMerge is a Java library for merging
 * multiple versions into multi-version documents (MVDs), and for
 * reading, searching and comparing them.
 *
 * NMerge is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package eu.interedition.collatex.nmerge.graph;

import eu.interedition.collatex.Witness;
import eu.interedition.collatex.nmerge.Errors;
import eu.interedition.collatex.nmerge.exception.MVDException;
import eu.interedition.collatex.suffixtree.SuffixTree;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import java.util.*;

import static java.util.Collections.singleton;

/**
 * Represent the Maximal Unique Match between a new version
 * and a Variant Graph
 *
 * @author Desmond Schmidt 29/10/08
 */
public class MaximalUniqueMatch<T> implements Comparable<MaximalUniqueMatch<T>> {
    private static boolean debug;

    //static int totalHashSize;
    //static int numberOfHashes;
    //static int maxHashSize;
    static int PRINTED_HASH_SIZE = 128;
    /**
     * the arc we are the MUM of
     */
    VariantGraphSpecialArc<T> arc;
    /**
     * the immediately opposite graph to align with
     */
    VariantGraph<T> graph;
    /**
     * the left hand part of the arc left over after alignment
     */
    private VariantGraphSpecialArc<T> leftSubArc;
    /**
     * the right hand part of the arc left over after alignment
     */
    private VariantGraphSpecialArc<T> rightSubArc;
    /**
     * special arcs on the left that need reMUMing
     */
    private Queue<VariantGraphSpecialArc<T>> leftSpecialArcs;
    /**
     * special arcs on the left that need reMUMing
     */
    private Queue<VariantGraphSpecialArc<T>> rightSpecialArcs;
    /**
     * The left hand subgraph after alignment
     */
    VariantGraph<T> leftSubGraph;
    /**
     * The right hand subgraph after alignment
     */
    VariantGraph<T> rightSubGraph;
    /**
     * The final Match
     */
    VariantGraphMatch<T> match;
    /**
     * the version of the new version
     */
    Witness version;
    /**
     * store candidate MUMs here
     */
    HashMap<VariantGraphMatch<T>, VariantGraphMatch<T>> table;
    /**
     * are we transposed?
     */
    boolean transposed;
    /**
     * transposing on the left?
     */
    boolean transposeLeft;
    /**
     * minimum length of a MUM
     */
    static final int MIN_LEN = 2;
    /**
     * initial length of table
     */
    static final int INITIAL_QUEUE_LEN = 128;
    /**
     * golden ratio - used for threshold
     */
    static final double PHI = 1.61803399d;

    /**
     * Construct a MUM
     *
     * @param arc        the current or final arc not included yet in path
     * @param graph      the graph to direct align to
     * @param transposed if true we are transposed
     */
    MaximalUniqueMatch(VariantGraphSpecialArc<T> arc, VariantGraph<T> graph, boolean transposed) {
        this.arc = arc;
        this.version = Iterables.getFirst(arc.versions, null);
        this.graph = graph;
        this.transposed = transposed;
        table = new HashMap<VariantGraphMatch<T>, VariantGraphMatch<T>>(INITIAL_QUEUE_LEN);
    }

    /**
     * Store in the priority table.
     *
     * @param start         the nearest node after which the match starts
     * @param graphOffset   the start offset from start in bytes
     * @param matchVersions the versions followed throughout the match
     * @param dataOffset    offset within arc where the alignment starts
     * @param length        the overall path length in bytes
     * @param distance      the distance between graph end and the match
     */
    void update(VariantGraphNode<T> start, int graphOffset, Set<Witness> matchVersions, int dataOffset, int length,
            int distance) {
        if (transposed && transposeLeft) {
            distance -= length;
        }
        if (!transposed || withinThreshold(distance, dataOffset, length)) {
            // select first version of match
            Set<Witness> bs = Sets.newHashSet();
            bs.addAll(matchVersions);
            // adjust if alignment is direct
            if (!transposed) {
                bs.retainAll(graph.constraint);
            }
            Witness v = Preconditions.checkNotNull(Iterables.getFirst(bs, null));
            VariantGraphMatch<T> m = new VariantGraphMatch<T>(start, graphOffset, v, dataOffset, length, arc.data);
            VariantGraphMatch<T> q = table.get(m);
            if (q != null) {
                if (!m.overlaps(q)) {
                    q.freq++;
                }
                // else it is the same data sharing part of the same
                // path, so it is really the same match
            } else {
                table.put(m, m);
            }
        }
    }

    /**
     * Get the length of this MUM
     *
     * @return the length of the data
     */
    int length() {
        if (match == null) {
            match = getBestMatch();
        }
        return (match == null) ? 0 : match.length;
    }

    /**
     * Get the match. If it is not already set, fetch it from the table.
     *
     * @return the bext match
     */
    public VariantGraphMatch<T> getMatch() {
        if (match == null) {
            match = getBestMatch();
        }
        return match;
    }

    /**
     * Retrieve the best match if possible from the table. Lazily
     * evaluated when we need it and then stored (we hope)
     *
     * @return the best match you could find, must be a MUM
     */
    private VariantGraphMatch<T> getBestMatch() {
        Set<VariantGraphMatch<T>> keys = table.keySet();
        Iterator<VariantGraphMatch<T>> iter = keys.iterator();
        VariantGraphMatch<T> biggest = null;
        VariantGraphMatch<T> other = null;
        while (iter.hasNext()) {
            VariantGraphMatch<T> m = iter.next();
            if (m.freq == 1 && (biggest == null || m.length > biggest.length)) {
                biggest = m;
            } else if (m.freq > 1 && (other == null || m.length > other.length)) {
                other = m;
            }
        }
        //if ( biggest != null && version == 6 )
        //   System.out.println("transposed="+transposed+" biggest="+biggest);
        return biggest;
    }

    /**
     * Get the left-hand portion of the graph not aligned to the arc.
     * Only call after merge. If it was a transposition this is just the
     * original graph.
     *
     * @return the left subgraph or null if none
     */
    public VariantGraph<T> getLeftSubgraph() {
        return leftSubGraph;
    }

    /**
     * Get the right hand bit of the graph left over alignment.
     * Only call after merge.
     *
     * @return the right subgraph or null if none
     */
    public VariantGraph<T> getRightSubgraph() {
        return rightSubGraph;
    }

    /**
     * Get the left-hand portion of the arc before the aligned bit.
     * Only call after merge.
     *
     * @return the left-hand part of the arc or null if none
     */
    public VariantGraphSpecialArc<T> getLeftSubarc() throws Exception {
        return leftSubArc;
    }

    /**
     * Get the right-hand portion of the arc after the aligned bit.
     * Only call after merge.
     *
     * @return an arc, maybe null
     */
    public VariantGraphSpecialArc<T> getRightSubarc() throws Exception {
        return rightSubArc;
    }

    /**
     * Work out if the proffered transpose match is within range.
     * An important method!
     *
     * @param distance   the distance from the end of the graph to
     *                   the edge of the match
     * @param dataOffset offset within data where the match starts
     * @param length     the length of the match that was found
     * @return true if it is within the threshold, false otherwise
     */
    boolean withinThreshold(int distance, int dataOffset, int length) {
        boolean answer = false;
        double pow = Math.pow((double) length, PHI);
        if (transposeLeft) {
            answer = pow > (double) (distance + dataOffset);
        } else // transposing on the right
        {
            answer = pow > (double) (distance + arc.dataLen() - (dataOffset + length));
        }
        return answer;
    }

    /**
     * Find a direct MUM by comparing a special arc with its immediately
     * opposite subgraph
     *
     * @param special  the special arc to find the MUM of
     * @param st       the suffix tree made from special
     * @param subGraph the subgraph directly opposite it
     * @return the best MUM or null
     */
    public static <T> MaximalUniqueMatch<T> findDirectMUM(VariantGraphSpecialArc<T> special, SuffixTree<T> st,
            VariantGraph<T> subGraph) throws MVDException {
        MaximalUniqueMatch<T> mum = new MaximalUniqueMatch<T>(special, subGraph, false);
        HashSet<VariantGraphNode<T>> printedNodes = new HashSet<VariantGraphNode<T>>(PRINTED_HASH_SIZE);
        Queue<VariantGraphNode<T>> queue = new ArrayDeque<VariantGraphNode<T>>();
        queue.add(subGraph.start);
        VariantGraphArc<T> lastArc = null;
        printedNodes.add(subGraph.start);
        List<PrevChar<T>> prevChars = Lists.newArrayList();
        if (debug) {
            subGraph.verify();
        }
        while (!queue.isEmpty()) {
            VariantGraphNode<T> node = queue.poll();
            ListIterator<VariantGraphArc<T>> iter = node.outgoingArcs(subGraph);
            while (iter.hasNext()) {
                VariantGraphArc<T> a = iter.next();
                if (a.dataLen() > 0 && (!a.isParent() || !a.hasChildInVersion(mum.version))) {
                    // first character
                    MatchThreadDirect<T> mtd;
                    List<T> data = a.getData();
                    if (a.from != subGraph.start) {
                        prevChars = a.from.getPrevChars(subGraph.constraint, subGraph.start);
                    }
                    mtd = new MatchThreadDirect<T>(mum, subGraph, st, a, a.from, 0, prevChars, subGraph.end);
                    mtd.run();
                    if (data.size() > 1) {
                        prevChars = Lists.newArrayListWithExpectedSize(1);
                        Set<Witness> prevVersions = Sets.newHashSet();
                        prevVersions.addAll(a.versions);
                        prevVersions.retainAll(subGraph.constraint);
                        // other (1 to N) characters
                        for (int i = 1; i < data.size(); i++) {
                            prevChars = Lists.newArrayList(
                                    Collections.singleton(new PrevChar<T>(prevVersions, data.get(i - 1))));
                            mtd = new MatchThreadDirect<T>(mum, subGraph, st, a, a.from, i, prevChars,
                                    subGraph.end);
                            mtd.run();
                        }
                    }
                }
                a.to.printArc(a);
                printedNodes.add(a.to);
                if (a.to != subGraph.end && a.to.allPrintedIncoming(subGraph.constraint)) {
                    queue.add(a.to);
                }
                lastArc = a;
            }
        }
        assert lastArc.to == subGraph.end;
        clearPrintedArcs(printedNodes);
        if (mum.length() > 0) {
            return mum;
        } else {
            return null;
        }
    }

    /**
     * Check that we can't get back to the source node
     *
     * @param list  the list of previously seen nodes
     * @param a     the arc to check for cycles
     * @param limit number of times to recurse before giving up
     */
    static <T> void checkForCycle(Queue<VariantGraphNode<T>> list, VariantGraphArc<T> a, int limit)
            throws MVDException {
        if (list.contains(a.to)) {
            throw new MVDException("Cycle: node" + a.to.nodeId + " already encountered");
        } else if (limit > 0) {
            ListIterator<VariantGraphArc<T>> iter = a.to.outgoingArcs();
            while (iter.hasNext()) {
                VariantGraphArc<T> b = iter.next();
                //System.out.println(b.toString() );
                list.add(a.to);
                checkForCycle(list, b, limit - 1);
            }
        }
    }

    /**
     * Simple debug routine
     *
     * @param data      the raw data to compare with a string
     * @param i         the index into data to start at
     * @param compareTo the string to compare the bytes of data to
     * @return true if they matched the whole length of compareTo
     */
    static boolean compareBytes(byte[] data, int i, String compareTo) {
        byte[] c = compareTo.getBytes();
        int j = 0;
        while (i < data.length && j < c.length) {
            if (data[i++] != c[j++]) {
                break;
            }
        }
        return j == c.length;
    }

    /**
     * Find the left transpose MUM by comparing a special arc with
     * the graph to the left of the immediately opposite subgraph
     *
     * @param special  the special arc to find the transpose MUM of
     * @param st       the suffix tree made from special
     * @param subGraph the subgraph directly opposite it
     * @return the best left transpose MUM or null
     */
    public static <T> MaximalUniqueMatch<T> findLeftTransposeMUM(VariantGraphSpecialArc<T> special,
            SuffixTree<T> st, VariantGraph<T> subGraph) {
        MaximalUniqueMatch<T> mum = new MaximalUniqueMatch<T>(special, subGraph, true);
        mum.transposeLeft = true;
        // 1. calculate number of bytes to go backwards
        int distance = Math.round((float) Math.pow(special.dataLen(), PHI));
        // 2. subtract distance between start of special to start of subGraph
        VariantGraphArc<T> a = special;
        Witness version = Iterables.getFirst(special.versions, null);
        while (!a.from.equals(subGraph.start)) {
            a = a.from.pickIncomingArc(version);
            distance -= a.dataLen();
        }
        // 3. Go back distance bytes following *all* paths
        findLeftPositions(mum, st, subGraph.start, distance);
        return (mum.getMatch() == null) ? null : mum;
    }

    /**
     * Find positions in the graph to start looking for transpositions.
     * Proceed backwards using breadth first search. Any arc reachable from
     * the start node of the graph is fair game for a transposition. This means
     * that we will gradually expand our range of versions, while marking the
     * arcs that we have traversed via the outgoing print-arc facility of node.
     * Then threads that generate the actual matches know only to traverse
     * these marked arcs. Special arcs are avoided because we only do
     * transpositions between special arcs and the graph.
     *
     * @param mum      the transpose mum to build
     * @param st       the suffix tree of the special arc
     * @param node     node to look backwards from
     * @param distance the distance to search left in bytes
     */
    static <T> void findLeftPositions(MaximalUniqueMatch<T> mum, SuffixTree<T> st, VariantGraphNode<T> node,
            int distance) {
        HashSet<VariantGraphNode<T>> printedNodes = new HashSet<VariantGraphNode<T>>(PRINTED_HASH_SIZE);
        Queue<VariantGraphNode<T>> queue = new ArrayDeque<VariantGraphNode<T>>();
        int travelled = 0;
        Witness mumV = mum.version;
        VariantGraphNode<T> origin = node;
        //BitSet range = new BitSet();
        queue.add(node);
        node.setShortestPath(0);
        while (!queue.isEmpty()) {
            node = queue.poll();
            // ALL of the incoming arcs are within range
            //range.or( node.getIncomingSet() );
            // the shortest path to get to this node
            int shortestPath = node.getShortestPath();
            ListIterator<VariantGraphArc<T>> iter = node.incomingArcs();
            while (iter.hasNext()) {
                VariantGraphArc<T> a = iter.next();
                if (a.dataLen() > 0 && !a.versions.contains(mumV)
                        && (!a.isParent() || !a.hasChildInVersion(mumV))) {
                    MatchThreadTransposeLeft<T> mtt;
                    int limit;
                    if (a.dataLen() + shortestPath < distance) {
                        limit = 0;
                    } else {
                        limit = (a.dataLen() + shortestPath) - distance;
                    }
                    // distance travelled in the current arc
                    travelled = 0;
                    List<PrevChar<T>> prevChars = Lists.newArrayListWithExpectedSize(1);
                    for (int i = a.dataLen() - 1; i >= limit; i--) {
                        // if all previous arcs are valid, so too
                        // are all previous chars of those arcs
                        if (i > 0) {
                            prevChars = Lists.newArrayList(
                                    Collections.singleton(new PrevChar<T>(a.versions, a.getData().get(i - 1))));
                        } else {
                            prevChars = a.from.getPrevChars();
                        }
                        mtt = new MatchThreadTransposeLeft<T>(mum, st, a, i, prevChars, shortestPath + travelled,
                                origin);
                        mtt.run();
                        travelled++;
                    }
                }
                // finished with this arc: record distance travelled
                a.from.printOutgoingArc(a, shortestPath);
                // keep track of nodes with printed arcs
                printedNodes.add(a.from);
                if (distance - (shortestPath + travelled) > 0 && a.from.indegree() > 0
                        && a.from.allPrintedOutgoing()) {
                    queue.add(a.from);
                }
            }
        }
        clearPrintedArcs(printedNodes);
    }

    /**
     * Clear the printed arcs and outgoing printed arcs of any nodes
     * in the set
     *
     * @param printedNodes a set of nodes with some arcs printed.
     */
    static <T> void clearPrintedArcs(HashSet<VariantGraphNode<T>> printedNodes) {
        Iterator<VariantGraphNode<T>> iter2 = printedNodes.iterator();
        while (iter2.hasNext()) {
            VariantGraphNode<T> n = iter2.next();
            n.reset();
        }
        //totalHashSize += printedNodes.size();
        //numberOfHashes++;
        //if ( printedNodes.size() > maxHashSize )
        //   maxHashSize = printedNodes.size();
    }

    /**
     * Find the right transpose MUM by comparing a special arc with the rest
     * of the overall graph to the right of the immediately opposite subgraph.
     *
     * @param special  the special arc to find the transpose MUM of
     * @param st       the suffix tree made from special
     * @param subGraph the subgraph directly opposite it
     * @return the best right transpose MUM or null
     */
    public static <T> MaximalUniqueMatch<T> findRightTransposeMUM(VariantGraphSpecialArc<T> special,
            SuffixTree<T> st, VariantGraph<T> subGraph) {
        MaximalUniqueMatch<T> mum = new MaximalUniqueMatch<T>(special, subGraph, true);
        // 1. calculate number of bytes to go forwards
        int distance = Math.round((float) Math.pow(special.dataLen(), PHI));
        // 2. subtract distance between end of special to end of subGraph
        VariantGraphArc<T> a = special;
        Witness version = Iterables.getFirst(special.versions, null);
        while (!a.to.equals(subGraph.end)) {
            a = a.to.pickOutgoingArc(version);
            distance -= a.dataLen();
        }
        // 3. Go forward distance bytes following *all* paths
        findRightPositions(mum, st, subGraph.end, distance);
        return (mum.getMatch() == null) ? null : mum;
    }

    /**
     * Find positions in the graph to start looking for transpositions
     * on the right of the subgraph. Proceed forwards using breadth-first
     * search.
     *
     * @param mum      the MUM to update
     * @param st       the suffix tree to lookup matches in
     * @param node     the node to start from
     * @param distance the distance to search forwards
     */
    static <T> void findRightPositions(MaximalUniqueMatch<T> mum, SuffixTree<T> st, VariantGraphNode<T> node,
            int distance) {
        Queue<VariantGraphNode<T>> queue = new ArrayDeque<VariantGraphNode<T>>();
        HashSet<VariantGraphNode<T>> printedNodes = new HashSet<VariantGraphNode<T>>(PRINTED_HASH_SIZE);
        VariantGraphNode<T> origin = node;
        Set<Witness> range = Sets.newHashSet();
        int travelled = 0;
        Witness mumV = mum.version;
        printedNodes.add(node);
        queue.add(node);
        node.setShortestPath(0);
        while (!queue.isEmpty()) {
            node = queue.poll();
            // ALL of the incoming arcs are within range
            range.addAll(node.getOutgoingSet());
            // the shortest path to get to this node
            int shortestPath = node.getShortestPath();
            ListIterator<VariantGraphArc<T>> iter = node.outgoingArcs();
            while (iter.hasNext()) {
                VariantGraphArc<T> a = iter.next();
                if (a.dataLen() > 0 && !a.versions.contains(mumV)
                        && (!a.isParent() || !a.hasChildInVersion(mumV))) {
                    MatchThreadTransposeRight<T> mttr;
                    List<PrevChar<T>> prevChars;
                    // number of bytes to travel in this arc
                    int limit;
                    if (a.dataLen() + shortestPath < distance) {
                        limit = a.dataLen();
                    } else {
                        limit = distance - shortestPath;
                    }
                    // distance travelled in this arc
                    travelled = 0;
                    // the 1st time there are NO prevchars
                    prevChars = (a.from == origin) ? Lists.<PrevChar<T>>newArrayList() : a.from.getPrevChars();
                    // process the first character separately
                    // because it needs different prevChars
                    mttr = new MatchThreadTransposeRight<T>(mum, st, a, 0, prevChars, shortestPath + travelled,
                            null);
                    mttr.run();
                    travelled++;
                    prevChars = Lists.newArrayList(singleton(new PrevChar<T>(a.versions, a.getData().get(0))));
                    for (int i = 1; i < limit; i++) {
                        mttr = new MatchThreadTransposeRight<T>(mum, st, a, i, prevChars, shortestPath + travelled,
                                null);
                        mttr.run();
                        prevChars = Lists.newArrayList(singleton(new PrevChar<T>(a.versions, a.getData().get(i))));
                        travelled++;
                    }
                }
                // finished with this arc: record distance travelled
                a.to.printArc(a, shortestPath + a.dataLen());
                // keep track of nodes with printed arcs
                printedNodes.add(a.to);
                if (distance - (shortestPath + a.dataLen()) > 0 && a.to.outdegree() > 0
                        && a.to.allPrintedIncoming(range)) {
                    queue.add(a.to);
                }
            }
        }
        // important: clean up all printed arcs
        clearPrintedArcs(printedNodes);
    }

    /**
     * Carry out the merge already calculated
     */
    public void merge() throws MVDException {
        if (match == null) {
            match = getBestMatch();
        }
        // create left and right subarcs
        if (match.dataOffset > 0) {
            leftSubArc = splitOffLeftArc();
        }
        if (match.length + match.dataOffset < arc.dataLen()) {
            rightSubArc = splitOffRightArc();
        }
        // create left and right subgraphs
        if (transposed) {
            if (match.dataOffset > 0) {
                leftSubGraph = graph;
            }
            if (match.length + match.dataOffset < arc.dataLen()) {
                rightSubGraph = graph;
            }
            transposeMerge();
        } else {
            createLeftSubGraph();
            createRightSubGraph();
            alignMerge();
        }
        // add left and right subarcs to the special left & right sets
        addSubArcsToSpecials();
    }

    /**
     * Do an align type merge.
     */
    private void alignMerge() throws MVDException {
        getSpecialArcs();
        //SetOfVersions before = new SetOfVersions( graph );
        VariantGraphNode<T> arcFrom = normaliseLeftResidualPath();
        VariantGraphNode<T> arcTo = normaliseRightResidualPath();
        // 3. Actually attach the left and right residual paths
        // to their respective subgraphs if required (otherwise OK)
        if (arcFrom != graph.start) {
            moveIncomingArcs(arcFrom, leftSubGraph.end);
        }
        if (arcTo != graph.end) {
            moveOutgoingArcs(arcTo, rightSubGraph.start);
        }
        // 3. add version to the aligned section
        match.addVersion(version);
        // debug
        /*if ( leftSubGraph != null )
         leftSubGraph.verify();
             if ( rightSubGraph != null )
         rightSubGraph.verify();
             SetOfVersions after = new SetOfVersions( graph );
             assert( after.equals(before) );
             BitSet out = graph.end.getOutgoingSet();
             BitSet in = graph.end.getIncomingSet();
             assert out.isEmpty()||out.equals(in);*/
    }

    /**
     * Add the left and right subarcs to the left and right
     * special sets so they can be MUMified later
     */
    private void addSubArcsToSpecials() {
        if (leftSubArc != null && leftSubArc.dataLen() >= MaximalUniqueMatch.MIN_LEN) {
            if (leftSpecialArcs == null) {
                leftSpecialArcs = new ArrayDeque<VariantGraphSpecialArc<T>>();
            }
            leftSpecialArcs.add(leftSubArc);
        }
        if (rightSubArc != null && rightSubArc.dataLen() >= MaximalUniqueMatch.MIN_LEN) {
            if (rightSpecialArcs == null) {
                rightSpecialArcs = new ArrayDeque<VariantGraphSpecialArc<T>>();
            }
            rightSpecialArcs.add(rightSubArc);
        }
    }

    /**
     * Get the special arcs on the left of the main special arc
     *
     * @return a list of left special arcs for reMUMing
     */
    public Queue<VariantGraphSpecialArc<T>> getLeftSpecialArcs() {
        return leftSpecialArcs;
    }

    /**
     * Get the special arcs on the right of the main special arc
     *
     * @return a list of right special arcs for reMUMing
     */
    public Queue<VariantGraphSpecialArc<T>> getRightSpecialArcs() {
        return rightSpecialArcs;
    }

    /**
     * Return the original special arc in case we have to
     * be recomputed
     *
     * @return the special arc of the new version, unaltered
     */
    public VariantGraphSpecialArc<T> getArc() {
        return arc;
    }

    /**
     * Return the original graph unaltered in case we have to
     * be recomputed
     *
     * @return the original graph we were aligned to
     */
    public VariantGraph<T> getGraph() {
        return graph;
    }

    /**
     * Get the other special arcs on either side of the main special arc
     * (if any - usually none. There will only be some if we carried out
     * a transposition of this set of arcs spanning this graph.
     */
    private void getSpecialArcs() {
        VariantGraphNode<T> leftFrom = arc.from;
        while (leftFrom != graph.start) {
            if (leftSpecialArcs == null) {
                leftSpecialArcs = new ArrayDeque<VariantGraphSpecialArc<T>>();
            }
            VariantGraphArc<T> a = leftFrom.pickIncomingArc(version);
            if (a instanceof VariantGraphSpecialArc<?>) {
                leftSpecialArcs.add((VariantGraphSpecialArc<T>) a);
            }
            leftFrom = a.from;
        }
        VariantGraphNode<T> rightTo = arc.to;
        while (rightTo != graph.end) {
            if (rightSpecialArcs == null) {
                rightSpecialArcs = new ArrayDeque<VariantGraphSpecialArc<T>>();
            }
            VariantGraphArc<T> a = rightTo.pickOutgoingArc(version);
            if (a instanceof VariantGraphSpecialArc<?>) {
                rightSpecialArcs.add((VariantGraphSpecialArc<T>) a);
            }
            rightTo = a.to;
        }
    }

    /**
     * Normalise the left-hand side of the special arc. Try to make it so
     * that the left residual arc exists and spans the left subgraph. Only
     * in the case that no left subgraph or left subarc exists will we
     * create nothing here. This is NOT called for transpositions.
     *
     * @return the node at the end of the left residual path, unattached to
     *         anything to the right
     */
    private VariantGraphNode<T> normaliseLeftResidualPath() throws MVDException {
        VariantGraphNode<T> arcFrom = arc.from;
        arc.from.removeOutgoing(arc);
        if (leftSubArc != null) {
            arcFrom.addOutgoing(leftSubArc);
            arcFrom = new VariantGraphNode<T>();
            arcFrom.addIncoming(leftSubArc);
        }
        if (leftSubGraph == null) {
            if (arcFrom != graph.start) {
                leftSubGraph = createEmptyLeftSubgraph();
            }
            // else we do nothing
        } else // leftSubGraph != null
        {
            if (arcFrom == graph.start) {
                VariantGraphArc<T> a = createEmptyArc(version);
                arcFrom.addOutgoing(a);
                arcFrom = new VariantGraphNode<T>();
                arcFrom.addIncoming(a);
            }
            // else we're good
        }
        return arcFrom;
    }

    /**
     * Normalise the right-hand side of the special arc. Try to make it so
     * that the right residual arc exists and spans the right subgraph. Only
     * in the case that no right subgraph or right subarc exists will we
     * create nothing here.
     *
     * @return the node at the start of the right residual path, unattached to
     *         anything to the left
     */
    private VariantGraphNode<T> normaliseRightResidualPath() throws MVDException {
        VariantGraphNode<T> arcTo = arc.to;
        arc.to.removeIncoming(arc);
        if (rightSubArc != null) {
            arcTo.addIncoming(rightSubArc);
            arcTo = new VariantGraphNode<T>();
            arcTo.addOutgoing(rightSubArc);
        }
        if (rightSubGraph == null) {
            if (arcTo != graph.end) {
                rightSubGraph = createEmptyRightSubgraph();
            }
            // else nothing to do
        } else // rightSubGraph != null
        {
            if (arcTo == graph.end) {
                VariantGraphArc<T> a = createEmptyArc(version);
                arcTo.addIncoming(a);
                arcTo = new VariantGraphNode<T>();
                arcTo.addOutgoing(a);
            }
            // else there's already a residual path
        }
        return arcTo;
    }

    /**
     * We don't have a leftSubGraph but because there is a leftSubArc
     * or residual left path we must create an empty subgraph to span it.
     *
     * @return an empty subgraph
     */
    private VariantGraph<T> createEmptyLeftSubgraph() throws MVDException {
        VariantGraphNode<T> n = new VariantGraphNode<T>();
        // create an empty arc to join n to graph.start
        Set<Witness> bs = Sets.newHashSet(graph.start.getVersions());
        bs.remove(version);
        assert !bs.isEmpty();
        VariantGraphArc<T> a = graph.start.pickOutgoingArc(version);
        assert (a != null && a.versions.size() == 1);
        // temporary remove
        graph.start.removeOutgoing(a);
        moveOutgoingArcs(graph.start, n);
        // now put it back
        graph.start.addOutgoing(a);
        // create an empty bridge arc
        VariantGraphArc<T> b = new VariantGraphArc<T>(bs, Lists.<T>newArrayList());
        graph.start.addOutgoing(b);
        n.addIncoming(b);
        VariantGraph<T> g = new VariantGraph<T>(graph.start, n, b.versions, graph.position);
        // debug
        //g.verify();
        return g;
    }

    /**
     * We don't have a rightSubGraph but because there is a rightSubArc or
     * residual right path we must create an empty sub graph to span it.
     *
     * @return an empty subgraph
     */
    private VariantGraph<T> createEmptyRightSubgraph() throws MVDException {
        VariantGraphNode<T> n = new VariantGraphNode<T>();
        // create an empty arc to join graph.end to n
        Set<Witness> bs = Sets.newHashSet(graph.end.getVersions());
        bs.remove(version);
        assert !bs.isEmpty();
        VariantGraphArc<T> a = graph.end.pickIncomingArc(version);
        assert (a != null && a.versions.size() == 1);
        // temporary removal
        graph.end.removeIncoming(a);
        moveIncomingArcs(graph.end, n);
        // now put it back
        graph.end.addIncoming(a);
        // create an empty bridge arc
        VariantGraphArc<T> b = new VariantGraphArc<T>(bs, Lists.<T>newArrayList());
        graph.end.addIncoming(b);
        n.addOutgoing(b);
        VariantGraph<T> g = new VariantGraph<T>(n, graph.end, b.versions,
                arc.position + match.dataOffset + match.length);
        // debug
        //g.verify();
        return g;
    }

    /**
     * Move the outgoing arcs of a node to another node.
     *
     * @param from the node from which to remove outgoing arcs
     * @param to   the node to which to move the outgoing arcs
     */
    private void moveOutgoingArcs(VariantGraphNode<T> from, VariantGraphNode<T> to) throws MVDException {
        while (!from.isOutgoingEmpty()) {
            VariantGraphArc<T> a = from.removeOutgoing(0);
            to.addOutgoing(a);
        }
        // check if node is now isolated
        if (from.indegree() == 0 && from.outdegree() == 0) {
            from.moveMatches(to);
        }
    }

    /**
     * Move the incoming arcs of a node to another node.
     *
     * @param from the node from which to remove incoming arcs
     * @param to   the node to which to move the incoming arcs
     */
    private void moveIncomingArcs(VariantGraphNode<T> from, VariantGraphNode<T> to) throws MVDException {
        while (!from.isIncomingEmpty()) {
            VariantGraphArc<T> a = from.removeIncoming(0);
            to.addIncoming(a);
        }
        // check if node is now isolated
        if (from.indegree() == 0 && from.outdegree() == 0) {
            from.moveMatches(to);
        }
    }

    /**
     * Merge a transposed MUM. In this case we merge the special arc
     * with a subgraph spanned by some other special arc or arcs. On
     * entry leftSubArc and rightSubArc are set, but may be null.
     * They are not attached to anything.
     */
    private void transposeMerge() throws MVDException {
        // attach leftSubArc if any
        VariantGraphNode<T> arcFrom = arc.from;
        //System.out.println(arc.toString());
        arc.from.removeOutgoing(arc);
        if (leftSubArc != null) {
            arcFrom.addOutgoing(leftSubArc);
            arcFrom = new VariantGraphNode<T>();
            arcFrom.addIncoming(leftSubArc);
        }
        // attach rightSubArc if any
        VariantGraphNode<T> arcTo = arc.to;
        arc.to.removeIncoming(arc);
        if (rightSubArc != null) {
            arcTo.addIncoming(rightSubArc);
            arcTo = new VariantGraphNode<T>();
            arcTo.addOutgoing(rightSubArc);
        }
        // now for the bit in the middle
        VariantGraphArc<T>[] parents = match.getMatchPath();
        for (int i = 0; i < parents.length; i++) {
            Set<Witness> versions = Sets.newHashSet(version);
            VariantGraphArc<T> child = new VariantGraphArc<T>(versions, parents[i]);
            arcFrom.addOutgoing(child);
            if (parents[i].versions.contains(version)) {
                Errors.LOG.error("Ooops!", new Exception());
            }
            if (i < parents.length - 1) {
                arcFrom = new VariantGraphNode<T>();
                arcFrom.addIncoming(child);
            } else {
                arcTo.addIncoming(child);
            }
        }
        //graph.verify();
    }

    /**
     * Create an empty arc for attachment somewhere
     *
     * @param version the version of the empty arc
     * @return the empty unattached arc
     */
    private VariantGraphArc<T> createEmptyArc(Witness version) {
        return new VariantGraphArc<T>(Sets.newHashSet(version), Lists.<T>newArrayList());
    }

    /**
     * The special arc needs to be split on the left
     *
     * @return an unaligned fragment of the original special arc
     *         not attached to any node
     */
    VariantGraphSpecialArc<T> splitOffLeftArc() {
        assert arc.parent == null && arc.children == null;
        final List<T> leftArcData = Lists.newArrayList(arc.getData().subList(0, match.dataOffset));
        return new VariantGraphSpecialArc<T>(Sets.newHashSet(version), leftArcData, arc.position);
    }

    /**
     * The special arc needs to be split on the right
     *
     * @return an unaligned fragment of the original special arc
     *         not attached to any node
     */
    VariantGraphSpecialArc<T> splitOffRightArc() {
        final List<T> arcData = arc.getData();
        List<T> rightArcData = Lists.newArrayList(arcData.subList(match.dataOffset + match.length, arcData.size()));
        return new VariantGraphSpecialArc<T>(Sets.newHashSet(version), rightArcData,
                arc.position + match.dataOffset + match.length);
    }

    /**
     * Create the right subgraph. This should only be called when
     * doing a direct alignment.
     *
     * @return the new right subgraph which may be null
     */
    private void createRightSubGraph() throws MVDException {
        VariantGraphNode<T> right = match.getRightNode();
        if (right == graph.end) {
            rightSubGraph = null;
        } else {
            rightSubGraph = new VariantGraph<T>(right, graph.end, getConstraint(right, graph.end),
                    arc.position + match.dataOffset + match.length);
            // debug
            //rightSubGraph.verify();
        }
    }

    /**
     * Create the left subgraph, whether or not we are doing a
     * transposition, it doesn't matter.
     */
    void createLeftSubGraph() throws MVDException {
        VariantGraphNode<T> left = match.getLeftNode();
        if (left == graph.start) {
            leftSubGraph = null;
        } else {
            leftSubGraph = new VariantGraph<T>(graph.start, left, getConstraint(graph.start, left), arc.position);
            // debug
            //leftSubGraph.verify();
        }
    }

    /**
     * Calculate the constraint on moving infallibly between
     * two nodes in the graph
     *
     * @param start the first node of the new subgraph
     * @param end   the last node of the new subgraph
     * @return a set of versions shared by start and end
     */
    Set<Witness> getConstraint(VariantGraphNode<T> start, VariantGraphNode<T> end) {
        return Sets.newHashSet(Sets.intersection(start.getVersions(), end.getVersions()));
    }

    /**
     * Is this MUM a transposition?
     *
     * @return true if we are transposed.
     */
    public boolean isTransposition() {
        return transposed;
    }

    /**
     * This is used in TreeMap to order the special arc keys. Since we want them
     * sorted by decreasing length we return 1 if we are LESS than the other.
     *
     * @param other the other MUM to compare this to
     * @return 0 if equal in length, -1 if we are less than other, 1 if greater
     */
    public int compareTo(MaximalUniqueMatch<T> other) {
        if (other == null) {
            return 0; // what else can we do??
        } else if (match.length < other.match.length) {
            return 1;
        } else if (match.length > other.match.length) {
            return -1;
        }
        // prefer direct alignments to transpositions
        else if (transposed && !other.transposed) {
            return 1;
        } else if (!transposed && other.transposed) {
            return -1;
        } else {
            return 0;
        }
    }

    /**
     * Verify that the MUM can be merged
     *
     * @return if it's a direct alignment, true if all transpositions in the
     *         path don't contain a child with the MUM's version in it. If
     *         a transposition return true if no part of the path is direct
     *         aligned with the MUM's version. Otherwise return false.
     */
    public boolean verify() {
        if (match != null) {
            return match.checkPath(version);
        } else {
            return false;
        }
        /*assert canReachBackwards(arc.from,graph.start,version);
             assert canReachForwards( arc.to,graph.end,version );
             assert arc.versions.nextSetBit(version)==version;
             assert arc.dataLen() >= MIN_LEN;
             // check that match is what it is supposed to be
             if ( !transposed )
             {
         // check that match.start is within the subgraph
         Node temp = graph.start;
         while ( temp != graph.end )
         {
             if ( temp == match.start )
                 break;
             else
             {
                 // this is sometimes not a correct verification
                 // because the match can be in a different version
                 // from that of the subgraph's constraint
                 Arc a = temp.pickOutgoingArc(match.version);
                 assert a != null;
                 temp = a.to;
             }
         }
         assert temp.outdegree()>0&&temp!=graph.end;
         // check that the match is what it says
         // and doesn't spill over the end
         match.verify( graph.end );
         // check that the match data is in the arc
         byte[] arcData = arc.getData();
         for ( int j=match.dataOffset,i=0;i<match.length;i++,j++ )
             assert match.data[i] == arcData[j];
             }*/
    }

    /**
     * Is there a path from one node back to another node by
     * following a particular version?
     *
     * @param from    the node from which to search backwards
     * @param to      the node to which to reach backwards
     * @param version the version path to follow
     * @return true if we can reach it
     */
    @SuppressWarnings("unused")
    private boolean canReachBackwards(VariantGraphNode<T> from, VariantGraphNode<T> to, Witness version) {
        while (from != null && from != to) {
            VariantGraphArc<T> a = from.pickIncomingArc(version);
            if (a.versions.size() != 1) {
                return false;
            }
            if (from != null) {
                from = a.from;
            }
        }
        return from == to;
    }

    /**
     * Is there a path from one node forwards to another node by
     * following a particular version?
     *
     * @param from    the node from which to search forwards
     * @param to      the node to which to reach forwards
     * @param version the version path to follow
     * @return true if we can reach it
     */
    @SuppressWarnings("unused")
    private boolean canReachForwards(VariantGraphNode<T> from, VariantGraphNode<T> to, Witness version) {
        while (from != null && from != to) {
            VariantGraphArc<T> a = from.pickOutgoingArc(version);
            if (a.versions.size() != 1) {
                return false;
            }
            if (from != null) {
                from = a.to;
            }
        }
        return from == to;
    }
}