opennlp.tools.util.Span.java Source code

Java tutorial

Introduction

Here is the source code for opennlp.tools.util.Span.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.util;

import java.io.Serializable;
import java.util.Objects;

/**
 * Class for storing start and end integer offsets.
 *
 */
public class Span implements Comparable<Span>, Serializable {

    private final int start;
    private final int end;
    private final double prob;//default is 0
    private final String type;

    /**
     * Initializes a new Span Object. Sets the prob to 0 as default.
     *
     * @param s start of span.
     * @param e end of span, which is +1 more than the last element in the span.
     * @param type the type of the span
     */
    public Span(int s, int e, String type) {
        this(s, e, type, 0d);
    }

    /**
     * Initializes a new Span Object.
     *
     * @param s start of span.
     * @param e end of span, which is +1 more than the last element in the span.
     * @param type the type of the span
     * @param prob probability of span.
     */
    public Span(int s, int e, String type, double prob) {

        if (s < 0) {
            throw new IllegalArgumentException("start index must be zero or greater: " + s);
        }
        if (e < 0) {
            throw new IllegalArgumentException("end index must be zero or greater: " + e);
        }
        if (s > e) {
            throw new IllegalArgumentException(
                    "start index must not be larger than end index: " + "start=" + s + ", end=" + e);
        }

        start = s;
        end = e;
        this.prob = prob;
        this.type = type;
    }

    /**
     * Initializes a new Span Object. Sets the prob to 0 as default
     *
     * @param s start of span.
     * @param e end of span.
     */
    public Span(int s, int e) {
        this(s, e, null, 0d);
    }

    /**
     *
     * @param s the start of the span (the token index, not the char index)
     * @param e the end of the span (the token index, not the char index)
     * @param prob
     */
    public Span(int s, int e, double prob) {
        this(s, e, null, prob);
    }

    /**
     * Initializes a new Span object with an existing Span which is shifted by an
     * offset.
     *
     * @param span
     * @param offset
     */
    public Span(Span span, int offset) {
        this(span.start + offset, span.end + offset, span.getType(), span.getProb());
    }

    /**
     * Creates a new immutable span based on an existing span, where the existing span did not include the prob
     * @param span the span that has no prob or the prob is incorrect and a new Span must be generated
     * @param prob the probability of the span
     */
    public Span(Span span, double prob) {
        this(span.start, span.end, span.getType(), prob);
    }

    /**
     * Return the start of a span.
     *
     * @return the start of a span.
     *
     */
    public int getStart() {
        return start;
    }

    /**
     * Return the end of a span.
     *
     * Note: that the returned index is one past the actual end of the span in the
     * text, or the first element past the end of the span.
     *
     * @return the end of a span.
     *
     */
    public int getEnd() {
        return end;
    }

    /**
     * Retrieves the type of the span.
     *
     * @return the type or null if not set
     */
    public String getType() {
        return type;
    }

    /**
     * Returns the length of this span.
     *
     * @return the length of the span.
     */
    public int length() {
        return end - start;
    }

    /**
     * Returns true if the specified span is contained by this span. Identical
     * spans are considered to contain each other.
     *
     * @param s The span to compare with this span.
     *
     * @return true is the specified span is contained by this span; false otherwise.
     */
    public boolean contains(Span s) {
        return start <= s.getStart() && s.getEnd() <= end;
    }

    /**
     * Returns true if the specified index is contained inside this span. An index
     * with the value of end is considered outside the span.
     *
     * @param index the index to test with this span.
     *
     * @return true if the span contains this specified index; false otherwise.
     */
    public boolean contains(int index) {
        return start <= index && index < end;
    }

    /**
     * Returns true if the specified span is the begin of this span and the
     * specified span is contained in this span.
     *
     * @param s The span to compare with this span.
     *
     * @return true if the specified span starts with this span and is contained
     *     in this span; false otherwise
     */
    public boolean startsWith(Span s) {
        return getStart() == s.getStart() && contains(s);
    }

    /**
     * Returns true if the specified span intersects with this span.
     *
     * @param s The span to compare with this span.
     *
     * @return true is the spans overlap; false otherwise.
     */
    public boolean intersects(Span s) {
        int sstart = s.getStart();
        //either s's start is in this or this' start is in s
        return this.contains(s) || s.contains(this) || getStart() <= sstart && sstart < getEnd()
                || sstart <= getStart() && getStart() < s.getEnd();
    }

    /**
     * Returns true is the specified span crosses this span.
     *
     * @param s The span to compare with this span.
     *
     * @return true is the specified span overlaps this span and contains a
     *     non-overlapping section; false otherwise.
     */
    public boolean crosses(Span s) {
        int sstart = s.getStart();
        //either s's start is in this or this' start is in s
        return !this.contains(s) && !s.contains(this)
                && (getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd());
    }

    /**
     * Retrieves the string covered by the current span of the specified text.
     *
     * @param text
     *
     * @return the substring covered by the current span
     */
    public CharSequence getCoveredText(CharSequence text) {
        if (getEnd() > text.length()) {
            throw new IllegalArgumentException("The span " + toString()
                    + " is outside the given text which has length " + text.length() + "!");
        }

        return text.subSequence(getStart(), getEnd());
    }

    /**
     * Return a copy of this span with leading and trailing white spaces removed.
     *
     * @param text
     *
     * @return the trimmed span or the same object if already trimmed
     */
    public Span trim(CharSequence text) {

        int newStartOffset = getStart();

        for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) {
            newStartOffset++;
        }

        int newEndOffset = getEnd();
        for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) {
            newEndOffset--;
        }

        if (newStartOffset == getStart() && newEndOffset == getEnd()) {
            return this;
        } else if (newStartOffset > newEndOffset) {
            return new Span(getStart(), getStart(), getType());
        } else {
            return new Span(newStartOffset, newEndOffset, getType());
        }
    }

    /**
     * Compares the specified span to the current span.
     */
    public int compareTo(Span s) {
        if (getStart() < s.getStart()) {
            return -1;
        } else if (getStart() == s.getStart()) {
            if (getEnd() > s.getEnd()) {
                return -1;
            } else if (getEnd() < s.getEnd()) {
                return 1;
            } else {
                // compare the type
                if (getType() == null && s.getType() == null) {
                    return 0;
                } else if (getType() != null && s.getType() != null) {
                    // use type lexicography order
                    return getType().compareTo(s.getType());
                } else if (getType() != null) {
                    return -1;
                }
                return 1;
            }
        } else {
            return 1;
        }
    }

    /**
     * Generates a hash code of the current span.
     */
    @Override
    public int hashCode() {
        return Objects.hash(getStart(), getEnd(), getType());
    }

    /**
     * Checks if the specified span is equal to the current span.
     */
    @Override
    public boolean equals(Object o) {
        if (o == this) {
            return true;
        }

        if (o instanceof Span) {
            Span s = (Span) o;

            return getStart() == s.getStart() && getEnd() == s.getEnd() && Objects.equals(getType(), s.getType());
        }

        return false;
    }

    /**
     * Generates a human readable string.
     */
    @Override
    public String toString() {
        StringBuilder toStringBuffer = new StringBuilder(15);
        toStringBuffer.append("[");
        toStringBuffer.append(getStart());
        toStringBuffer.append("..");
        toStringBuffer.append(getEnd());
        toStringBuffer.append(")");
        if (getType() != null) {
            toStringBuffer.append(" ");
            toStringBuffer.append(getType());
        }

        return toStringBuffer.toString();
    }

    /**
     * Converts an array of {@link Span}s to an array of {@link String}s.
     *
     * @param spans
     * @param s
     * @return the strings
     */
    public static String[] spansToStrings(Span[] spans, CharSequence s) {
        String[] tokens = new String[spans.length];

        for (int si = 0, sl = spans.length; si < sl; si++) {
            tokens[si] = spans[si].getCoveredText(s).toString();
        }

        return tokens;
    }

    public static String[] spansToStrings(Span[] spans, String[] tokens) {
        String[] chunks = new String[spans.length];
        StringBuilder cb = new StringBuilder();
        for (int si = 0, sl = spans.length; si < sl; si++) {
            cb.setLength(0);
            for (int ti = spans[si].getStart(); ti < spans[si].getEnd(); ti++) {
                cb.append(tokens[ti]).append(" ");
            }
            chunks[si] = cb.substring(0, cb.length() - 1);
        }
        return chunks;
    }

    public double getProb() {
        return prob;
    }

}