edu.stanford.nlp.process.CoreLabelTokenFactory.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.process.CoreLabelTokenFactory.java

Source

package edu.stanford.nlp.process;

import java.io.Serializable;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;

/**
 * Constructs {@link CoreLabel}s from Strings optionally with
 * beginning and ending (character after the end) offset positions in
 * an original text.  The makeToken method will put the token in the
 * OriginalTextAnnotation AND TextAnnotation keys (2 places!),
 * and optionally records
 * begin and position after offsets in BeginPositionAnnotation and
 * EndPositionAnnotation.  If the tokens are built in PTBTokenizer with
 * an "invertible" tokenizer, you will also get a BeforeAnnotation and for
 * the last token an AfterAnnotation. You can also get an empty CoreLabel token.
 *
 * @author Anna Rafferty
 * @author Sonal Gupta (now implements CoreTokenFactory, you can make tokens using many options)
 */
public class CoreLabelTokenFactory
        implements CoreTokenFactory<CoreLabel>, LexedTokenFactory<CoreLabel>, Serializable {

    private final boolean addIndices;

    /**
     * Constructor for a new token factory which will add in the word, the
     * "current" annotation, and the begin/end position annotations.
     */
    public CoreLabelTokenFactory() {
        this(true);
    }

    /**
     * Constructor that allows one to choose if index annotation
     * indicating begin/end position will be included in the label.
     *
     * @param addIndices if true, begin and end position annotations will be included (this is the default)
     */
    public CoreLabelTokenFactory(boolean addIndices) {
        super();
        this.addIndices = addIndices;
    }

    /**
     * Constructs a CoreLabel as a String with a corresponding BEGIN and END position.
     * (Does not take substring).
     */
    @Override
    public CoreLabel makeToken(String tokenText, int begin, int length) {
        return makeToken(tokenText, tokenText, begin, length);
    }

    /**
     * Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
     * when the original OriginalTextAnnotation is different from TextAnnotation
     * (Does not take substring).
     */
    public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
        CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
        cl.setValue(tokenText);
        cl.setWord(tokenText);
        cl.setOriginalText(originalText);
        if (addIndices) {
            cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
            cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin + length);
        }
        return cl;
    }

    @Override
    public CoreLabel makeToken() {
        CoreLabel l = new CoreLabel();
        return l;
    }

    @Override
    public CoreLabel makeToken(String[] keys, String[] values) {
        CoreLabel l = new CoreLabel(keys, values);
        return l;
    }

    @Override
    public CoreLabel makeToken(CoreLabel labelToBeCopied) {
        CoreLabel l = new CoreLabel(labelToBeCopied);
        return l;
    }

    private static final long serialVersionUID = 4L;
}