au.edu.uq.nmerge.mvd.KMPSearchState.java Source code

Java tutorial

Introduction

Here is the source code for au.edu.uq.nmerge.mvd.KMPSearchState.java

Source

/*
 * NMerge is Copyright 2009-2011 Desmond Schmidt
 *
 * This file is part of NMerge. NMerge is a Java library for merging
 * multiple versions into multi-version documents (MVDs), and for
 * reading, searching and comparing them.
 *
 * NMerge is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package au.edu.uq.nmerge.mvd;

import au.edu.uq.nmerge.exception.MVDException;
import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;

import java.util.List;
import java.util.Set;

/**
 * This version of search uses Knuth-Morris-Pratt search
 * algorithm. Not very fast perhaps but reliable. I am sick
 * of problems with Karp-Rabin search. The hash keeps overflowing.
 *
 * @author Desmond Schmidt
 */
public class KMPSearchState<T> {
    private final Ordering<T> ordering;
    List<T> pattern;
    Set<Witness> v;
    KMPSearchState<T> following;
    int[] next;
    /**
     * next byte to match
     */
    int pos;

    /**
     * Initialisation is easy.
     *
     * @param pattern the pattern to search for
     */
    public KMPSearchState(Ordering<T> ordering, List<T> pattern, Set<Witness> v) {
        this.ordering = ordering;
        this.v = v;
        this.pattern = pattern;
        next = initNext(ordering, pattern);
    }

    /**
     * Constructor for cloning this object - useful for split.
     * Leave the versions empty
     *
     * @param ss the SearchState object to clone
     */
    private KMPSearchState(KMPSearchState<T> ss, Set<Witness> v) {
        this.ordering = ss.ordering;
        this.pattern = ss.pattern;
        this.v = Sets.newHashSet(v);
        this.pos = ss.pos;
        next = new int[ss.next.length];
        for (int i = 0; i < ss.next.length; i++) {
            this.next[i] = ss.next[i];
        }
    }

    /**
     * Initialise the next table
     *
     * @param pattern the pattern as a byte array in any encoding
     * @return an array of next indices
     */
    private static <T> int[] initNext(Ordering<T> ordering, List<T> pattern) {
        int[] next = new int[pattern.size()];
        int i = 0, j = -1;
        next[0] = -1;
        while (i < pattern.size() - 1) {
            while (j >= 0 && ordering.compare(pattern.get(i), pattern.get(j)) != 0) {
                j = next[j];
            }
            i++;
            j++;
            next[i] = j;
        }
        next[0] = 0;
        return next;
    }

    /**
     * Concatenate a list of SearchState objects to the end of our list.
     *
     * @param list a list of SearchState objects
     */
    void append(KMPSearchState<T> list) {
        KMPSearchState<T> temp = this;
        while (temp.following != null) {
            temp = temp.following;
        }
        temp.following = list;
    }

    /**
     * Override of the Object method
     *
     * @param obj another SearchState object to compare with
     * @return true if they have the same internal states but
     *         different sets
     */
    public boolean equals(Object obj) {
        return ((KMPSearchState<?>) obj).pos == pos;
    }

    /**
     * Are this object's versions a subset of those given?
     *
     * @param v the witness set of which we might be a subset
     * @return true if we are a subset of bs
     */
    boolean isSubset(Set<Witness> v) {
        return v.containsAll(this.v);
    }

    /**
     * Combine the versions of the given search state with ours.
     *
     * @param s the search state object to merge with this one
     */
    void merge(KMPSearchState<T> s) {
        this.v.addAll(s.v);
    }

    /**
     * Remove a SearchState object from the list of which we are a part.
     * The object must be in the list FROM the point at which we are at
     * (because we are not doubly-linked).
     *
     * @param item the list item to remove
     * @return the list with the item removed (may be null)
     * @throws MVDException
     */
    KMPSearchState<T> remove(KMPSearchState<T> item) throws MVDException {
        KMPSearchState<T> previous, list, temp;
        previous = temp = list = this;
        while (temp != null && temp != item) {
            previous = temp;
            temp = temp.following;
        }
        if (previous == temp) // it matched immediately
        {
            list = temp.following; // could be null!
            temp.following = null;
        } else if (temp == null) // it didn't find it!
        {
            throw new MVDException("List item not found");
        } else // temp in the middle of the list
        {
            previous.following = temp.following;
            temp.following = null;
        }
        return list;
    }

    /**
     * Split off a clone of ourselves intersecting with bs as its set of
     * versions. Should only be called after this.v.intersects(bs) has
     * returned true.
     *
     * @param bs the set which must intersect with our versions.
     * @return a clone of everything we stand for.
     */
    KMPSearchState<T> split(Set<Witness> bs) {
        final Sets.SetView<Witness> intersection = Sets.intersection(this.v, bs);
        Preconditions.checkArgument(!intersection.isEmpty());
        return new KMPSearchState<T>(this, Sets.newHashSet(intersection));
    }

    /**
     * Update the search state with a new byte
     *
     * @param c the character from the text to update with
     * @return true if a match, false otherwise
     */
    boolean update(T c) {
        if (ordering.compare(pattern.get(pos), c) == 0)
        // we have a match
        {
            pos++;
            if (pos == pattern.size()) {
                pos = 0;
                return true;
            }
        } else
        // we have a mismatch -
        // use the next array to reset pos
        {
            pos = next[pos];
        }
        return false;
    }
}