edu.stanford.nlp.ling.WordLemmaTag.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.ling.WordLemmaTag.java

Source

package edu.stanford.nlp.ling;

import edu.stanford.nlp.process.Morphology;

/**
 * A WordLemmaTag corresponds to a pair of a tagged (e.g., for part of speech)
 * word and its lemma. WordLemmaTag is implemented with String-valued word,
 * lemma and tag.
 * It implements the Label interface; the {@code value()} method for that
 * interface corresponds to the word of the WordLemmaTag.
 * <p>
 * The equality relation for WordLemmaTag is defined as identity of
 * word, lemma and tag.
 *
 * @author Marie-Catherine de Marneffe
 */
public class WordLemmaTag implements Label, Comparable<WordLemmaTag>, HasWord, HasTag {

    private String word;
    private String lemma;
    private String tag;
    private static final String DIVIDER = "/";

    public WordLemmaTag(String word) {
        this.word = word;
        this.lemma = null;
        setTag(null);
    }

    public WordLemmaTag(Label word) {
        this(word.value());
    }

    public WordLemmaTag() {
    }

    /**
     * Create a new {@code WordLemmaTag}.
     *
     * @param word This word is set as the word of this Label
     * @param tag  The {@code value()} of this Label is set as the
     *             tag of this Label
     */
    public WordLemmaTag(String word, String tag) {
        WordTag wT = new WordTag(word, tag);
        this.word = word;
        this.lemma = Morphology.stemStatic(wT).word();
        setTag(tag);
    }

    /**
     * Create a new {@code WordLemmaTag}.
     *
     * @param word  This word is passed to the supertype constructor
     * @param lemma The lemma is set as the lemma of this Label
     * @param tag   The {@code value()} of this Label is set as the
     *              tag of this Label
     */
    public WordLemmaTag(String word, String lemma, String tag) {
        this(word);
        this.lemma = lemma;
        setTag(tag);
    }

    /**
     * Create a new {@code WordLemmaTag} from a Label.  The value of
     * the Label corresponds to the word of the WordLemmaTag.
     *
     * @param word This word is passed to the supertype constructor
     * @param tag  The {@code value()} of this Label is set as the
     *             tag of this Label
     */
    public WordLemmaTag(Label word, Label tag) {
        this(word);
        WordTag wT = new WordTag(word, tag);
        this.lemma = Morphology.stemStatic(wT).word();
        setTag(tag.value());
    }

    /**
     * Return a String representation of just the "main" value of this Label.
     *
     * @return the "value" of the Label
     */
    @Override
    public String value() {
        return word;
    }

    @Override
    public String word() {
        return value();
    }

    /**
     * Set the value for the Label.
     *
     * @param value the value for the Label
     */
    @Override
    public void setValue(String value) {
        word = value;
    }

    @Override
    public void setWord(String word) {
        setValue(word);
    }

    public void setLemma(String lemma) {
        this.lemma = lemma;
    }

    /**
     * Set the tag for the Label.
     *
     * @param tag the value for the Label
     */
    @Override
    public final void setTag(String tag) {
        this.tag = tag;
    }

    @Override
    public String tag() {
        return tag;
    }

    public String lemma() {
        return lemma;
    }

    /**
     * Return a String representation of the Label.  For a multipart Label,
     * this will return all parts.
     *
     * @return a text representation of the full label contents: word/lemma/tag
     */
    @Override
    public String toString() {
        return toString(DIVIDER);
    }

    public String toString(String divider) {
        return word() + divider + lemma + divider + tag;
    }

    /**
     * The String is divided according to the divider character (usually, "/").
     * We assume that we can always just divide on the rightmost divider character,
     * rather than trying to parse up escape sequences.  If the divider character isn't found
     * in the word, then the whole string becomes the word, and lemma and tag
     * are {@code null}.
     * We assume that if only one divider character is found, word and tag are present in
     * the String, and lemma will be computed.
     *
     * @param labelStr The word that will go into the {@code WordLemmaTag}
     */
    @Override
    public void setFromString(String labelStr) {
        setFromString(labelStr, DIVIDER);
    }

    public void setFromString(String labelStr, String divider) {
        int first = labelStr.indexOf(divider);
        int second = labelStr.lastIndexOf(divider);
        if (first == second) {
            setWord(labelStr.substring(0, first));
            setTag(labelStr.substring(first + 1));
            setLemma(Morphology.lemmaStatic(labelStr.substring(0, first), labelStr.substring(first + 1)));
        } else if (first >= 0) {
            setWord(labelStr.substring(0, first));
            setLemma(labelStr.substring(first + 1, second));
            setTag(labelStr.substring(second + 1));
        } else {
            setWord(labelStr);
            setLemma(null);
            setTag(null);
        }
    }

    /**
     * Equality is satisfied only if the compared object is a WordLemmaTag
     * and has String-equal word, lemma and tag fields.
     */
    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof WordLemmaTag))
            return false;

        final WordLemmaTag other = (WordLemmaTag) o;
        return word().equals(other.word()) && lemma().equals(other.lemma()) && tag().equals(other.tag());
    }

    @Override
    public int hashCode() {
        int result;
        result = (word != null ? word.hashCode() : 3);
        result = 29 * result + (tag != null ? tag.hashCode() : 0);
        result = 29 * result + (lemma != null ? lemma.hashCode() : 0);
        return result;
    }

    /**
     * Orders first by word, then by lemma, then by tag.
     *
     * @param wordLemmaTag object to compare to
     * @return result (positive if {@code this} is greater than
     *         {@code obj}, 0 if equal, negative otherwise)
     */
    @Override
    public int compareTo(WordLemmaTag wordLemmaTag) {
        int first = word().compareTo(wordLemmaTag.word());
        if (first != 0)
            return first;
        int second = lemma().compareTo(wordLemmaTag.lemma());
        if (second != 0)
            return second;
        else
            return tag().compareTo(wordLemmaTag.tag());
    }

    /**
     * Return a factory for this kind of label
     * (i.e., {@code TaggedWord}).
     * The factory returned is always the same one (a singleton).
     *
     * @return The label factory
     */
    @Override
    public LabelFactory labelFactory() {
        return new WordLemmaTagFactory();
    }

    /*for debugging only*/
    public static void main(String[] args) {
        WordLemmaTag wLT = new WordLemmaTag();
        wLT.setFromString("hunter/NN");

        System.out.println(wLT.word());
        System.out.println(wLT.lemma());
        System.out.println(wLT.tag());

        WordLemmaTag wLT2 = new WordLemmaTag();
        wLT2.setFromString("bought/buy/V");
        System.out.println(wLT2.word());
        System.out.println(wLT2.lemma());
        System.out.println(wLT2.tag());

        WordLemmaTag wLT3 = new WordLemmaTag();
        wLT2.setFromString("life");
        System.out.println(wLT3.word());
        System.out.println(wLT3.lemma());
        System.out.println(wLT3.tag());

    }

    private static final long serialVersionUID = -5993410244163988138L;

}