Source code

Java tutorial


Here is the source code for


 * NMerge is Copyright 2009-2011 Desmond Schmidt
 * This file is part of NMerge. NMerge is a Java library for merging
 * multiple versions into multi-version documents (MVDs), and for
 * reading, searching and comparing them.
 * NMerge is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <>.
package eu.interedition.collatex.nmerge.mvd;

import eu.interedition.collatex.Witness;
import eu.interedition.collatex.nmerge.exception.MVDException;

import java.util.List;
import java.util.Set;

 * Represent a variant computed from a range in a base version
 * @author Desmond Schmidt 1/6/09
public class Variant<T> implements Comparable<Variant<T>> {
     * more than one version for a variant is possible
    Set<Witness> versions;
     * the start-index where it occurs within pairs
    int startIndex;
     * end index within pairs (used for isWithin)
    int endIndex;
     * the start-offset within start node
    int startOffset;
     * the length of the variant's real data in bytes
    int length;
     * the mvd it is associated with
    Collation<T> collation;
     * the actual data of this variant
    List<T> data;

     * Construct a variant
     * @param startOffset initial offset within startIndex
     * @param startIndex  the index within mvd of the first node
     * @param length      the length of the variant
     * @param versions    the set of versions over the variant
     * @param collation   the mvd it came from
     * @throws MVDException
    public Variant(int startOffset, int startIndex, int endIndex, int length, Set<Witness> versions,
            Collation<T> collation) {
        this.startIndex = startIndex;
        this.endIndex = endIndex;
        this.collation = collation;
        this.versions = versions;
        this.length = length;
        this.startOffset = startOffset;

     * Get the version set
     * @return a BitSet
    public Set<Witness> getVersions() {
        return versions;

     * Create a String representing the header part of a chunk
     * @return the header as a String including the trailing ':'
    protected String createHeader() {
        StringBuffer sb = new StringBuffer();
        return sb.toString();

     * Convert to a string
    public String toString() {
        String header = createHeader();
        StringBuffer sb = new StringBuffer();
        String dataStr = sb.toString();
        return header + dataStr + "]";

     * Test for equality. Versions don't matter. What we want is to find
     * out if two variants have the same text.
     * @param other the other variant to compare with this one
     * @return true if they are the same
    public boolean equals(Object other) {
        Variant<?> otherV = (Variant<?>) other;
        return this.versions.equals(otherV.versions) && this.startIndex == otherV.startIndex
                && this.endIndex == otherV.endIndex && this.startOffset == otherV.startOffset
                && this.collation == otherV.collation && this.equalsContent(otherV);

     * Are two variants equal in content but differ only in versions?
     * @param other the other variant to compare with
     * @return true if they are 'equal'
    public boolean equalsContent(Variant<?> other) {
        if (!this.collation.equals(other.collation)) {
            return false;
        } else {

     * Generate a hash of the content of this Variant. It should be almost
     * unique. It will be used to collect together and wipe out any variants
     * generated during the getApparatus method that are identical.
    public int hashCode() {
        return Objects.hashCode(startIndex, startOffset, versions, data);

     * Generate content by following the paths of the variant
     * in the MVD.
    private void findContent() {
        data = Lists.newArrayList();
        int iNode = startIndex;
        Match<T> p = collation.getMatches().get(iNode);
        int i = startOffset;
        int totalLen = 0;
        while (p.length() == 0 || totalLen < this.length) {
            if (p.length() == 0 || i == p.length()) {
                iNode = + 1, Iterables.getFirst(versions, null));
                p = collation.getMatches().get(iNode);
                i = 0;
            } else {
                totalLen += p.getTokens().size();

     * Merge two variants equal in content
     * @param other the other variant to merge with this one.
    public void merge(Variant<T> other) {

     * Is this variant entirely contained within another variant?
     * We just check if we are within the bounds of the other variant.
     * No need to compare the text of the two variants - the versions
     * must be the same, so within the bounds means that the same text
     * will occur.
     * @param other the other variant to compare it to
     * @return true if we are within other, false otherwise
    public boolean isWithin(Variant<T> other) {
        // these tests will mostly fail
        // so we can avoid the main computation
        if (length < other.length && startIndex >= other.startIndex && endIndex <= other.endIndex
                && this.versions.equals(other.versions)) {
            // another quick test to shortcut the computation
            if (startIndex == other.startIndex && (startOffset < other.startOffset
                    || (startOffset - other.startOffset) + length > other.length)) {
                return false;
            } else {
                // OK, we have some work to do ...
                // find the start of this variant in other
                int offset = other.startOffset;
                int index = other.startIndex;
                Match<T> p = collation.getMatches().get(index);
                int i = 0;
                Witness followV = Iterables.getFirst(versions, null);
                while (i < other.length) {
                    if (offset == p.length()) {
                        index = + 1, followV);
                        p = collation.getMatches().get(index);
                        offset = 0;
                    } else {
                    // found start?
                    if (index == startIndex && offset == startOffset) {
                        return other.length - i >= length;
        return false;

     * Compare two Variants. Try to short-circuit the
     * comparison to reduce computation.
     * @param other the variant to compare ourselves to
    public int compareTo(Variant<T> other) {
        if (this.startIndex < other.startIndex) {
            return -1;
        } else if (this.startIndex > other.startIndex) {
            return 1;
        } else if (this.startOffset < other.startOffset) {
            return -1;
        } else if (this.startOffset > other.startOffset) {
            return 1;
        } else if (this.length < other.length) {
            return -1;
        } else if (this.length > other.length) {
            return 1;
        } else {
            // FIXME: What is a proper ordering of witness sets?
            final Joiner joiner = Joiner.on(',');
            String thisV = joiner.join(versions);
            String thatV = joiner.join(other.versions);
            int res = thisV.compareTo(thatV);
            if (res != 0) {
                return res;
            } else {
                // FIXME: introduce token comparator to do a proper comparison
                return Iterables.toString(data).compareTo(Iterables.toString(;