edu.stanford.nlp.ling.WordTag.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.ling.WordTag.java

Source

package edu.stanford.nlp.ling;

import java.io.DataInputStream;
import java.io.DataOutputStream;

/**
 * A WordTag corresponds to a tagged (e.g., for part of speech) word
 * and is implemented with String-valued word and tag.  It implements
 * the Label interface; the {@code value()} method for that
 * interface corresponds to the word of the WordTag.
 * <p>
 * The equality relation for WordTag is defined as identity of both
 * word and tag.  Note that this is different from
 * {@code TaggedWord}, for which equality derives from
 * {@code ValueLabel} and requires only identity of value.
 *
 * @author Roger Levy
 */

public class WordTag implements Label, HasWord, HasTag, Comparable<WordTag> {

    private String word;
    private String tag;
    private static final String DIVIDER = "/";

    /**
     * Create a new {@code WordTag}.
     *
     * @param word This word is passed to the supertype constructor
     * @param tag  The {@code value()} of this label is set as the
     *             tag of this Label
     */
    public WordTag(String word, String tag) {
        setWord(word);
        setTag(tag);
    }

    public WordTag(String word) {
        this(word, null);
    }

    public <E extends Label & HasTag> WordTag(E word) {
        this(word.value(), word.tag());
    }

    private WordTag() {
    } // only used internally for doing setFromString()

    /**
     * Create a new {@code WordTag} from a Label.  The value of
     * the Label corresponds to the word of the WordTag.
     *
     * @param word The {@code value()} of this label is set as the
     *             word of the {@code WordTag}
     * @param tag  The {@code value()} of this label is set as the
     *             tag of the {@code WordTag}
     */
    public WordTag(Label word, Label tag) {
        this(word.value(), tag.value());
    }

    public static WordTag valueOf(String s) {
        WordTag result = new WordTag();
        result.setFromString(s);
        return result;
    }

    public static WordTag valueOf(String s, String tagDivider) {
        WordTag result = new WordTag();
        result.setFromString(s, tagDivider);
        return result;
    }

    /**
     * Return a String representation of just the "main" value of this label.
     *
     * @return the "value" of the label
     */
    public String value() {
        return word;
    }

    public String word() {
        return value();
    }

    /**
     * Set the value for the label (if one is stored).
     *
     * @param value - the value for the label
     */
    public void setValue(String value) {
        word = value;
    }

    public String tag() {
        return tag;
    }

    public void setWord(String word) {
        setValue(word);
    }

    public void setTag(String tag) {
        this.tag = tag;
    }

    /**
     * Return a String representation of the label.  For a multipart label,
     * this will return all parts.  The {@code toString()} method
     * causes a label to spill its guts.  It should always return an
     * empty string rather than {@code null} if there is no value.
     *
     * @return a text representation of the full label contents
     */
    @Override
    public String toString() {
        return toString(DIVIDER);
    }

    public String toString(String divider) {
        String tag = tag();
        if (tag == null) {
            return word();
        } else {
            return word() + divider + tag;
        }
    }

    /**
     * Sets a WordTag from decoding
     * the {@code String} passed in.  The String is divided according
     * to the divider character (usually, "/").  We assume that we can
     * always just
     * divide on the rightmost divider character, rather than trying to
     * parse up escape sequences.  If the divider character isn't found
     * in the word, then the whole string becomes the word, and the tag
     * is {@code null}.
     *
     * @param wordTagString The word that will go into the {@code Word}
     */
    @Override
    public void setFromString(String wordTagString) {
        setFromString(wordTagString, DIVIDER);
    }

    public void setFromString(String wordTagString, String divider) {
        int where = wordTagString.lastIndexOf(divider);
        if (where >= 0) {
            setWord(wordTagString.substring(0, where).intern());
            setTag(wordTagString.substring(where + 1).intern());
        } else {
            setWord(wordTagString.intern());
            setTag(null);
        }
    }

    /** A WordTag is equal only to another WordTag with the same word and tag values.
     */
    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof WordTag))
            return false;
        final WordTag wordTag = (WordTag) o;
        if (tag != null ? !tag.equals(wordTag.tag) : wordTag.tag != null)
            return false;
        if (word != null ? !word.equals(wordTag.word) : wordTag.word != null)
            return false;
        return true;
    }

    @Override
    public int hashCode() {
        int result;
        result = (word != null ? word.hashCode() : 0);
        result = 29 * result + (tag != null ? tag.hashCode() : 0);
        return result;
    }

    /**
     * Orders first by word, then by tag.
     *
     * @param wordTag object to compare to
     * @return result (positive if {@code this} is greater than
     *         {@code obj}, 0 if equal, negative otherwise)
     */
    public int compareTo(WordTag wordTag) {
        int first = (word != null ? word().compareTo(wordTag.word()) : 0);
        if (first != 0)
            return first;
        else {
            if (tag() == null) {
                if (wordTag.tag() == null)
                    return 0;
                else
                    return -1;
            }
            return tag().compareTo(wordTag.tag());
        }
    }

    // extra class guarantees correct lazy loading (Bloch p.194)
    private static class LabelFactoryHolder {

        private static final LabelFactory lf = new WordTagFactory();

    }

    /**
     * Return a factory for this kind of label
     * (i.e., {@code TaggedWord}).
     * The factory returned is always the same one (a singleton).
     *
     * @return The label factory
     */
    public LabelFactory labelFactory() {
        return LabelFactoryHolder.lf;
    }

    /**
     * Return a factory for this kind of label.
     *
     * @return The label factory
     */
    public static LabelFactory factory() {
        return LabelFactoryHolder.lf;
    }

    public void read(DataInputStream in) {
        try {
            word = in.readUTF();
            tag = in.readUTF();

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void save(DataOutputStream out) {
        try {
            out.writeUTF(word);
            out.writeUTF(tag);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static final long serialVersionUID = -1859527239216813742L;

}