org.t3as.ner.Phrase.java Source code

Java tutorial

Introduction

Here is the source code for org.t3as.ner.Phrase.java

Source

/*
 * #%L
 * NICTA t3as Named-Entity Recognition library
 * %%
 * Copyright (C) 2010 - 2014 NICTA
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * #L%
 */
package org.t3as.ner;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.t3as.ner.EntityClass.UNKNOWN;

/**
 * Instance of this class indicates a Phrase.
 * A phrase can have one or more than one word.
 */
public class Phrase {
    // TODO: getters and setters for these
    /** phrase text array */
    public final List<Token> phrase;
    /** corresponding name type */
    public EntityClass phraseType;
    /** the start position of the phase in a sentence */
    public final int phrasePosition;
    /** the length of the phrase */
    public final int phraseLength;
    /** the start position of the phrase stub in a sentence */
    public final int phraseStubPosition;
    /** the length of the phrase stub */
    public final int phraseStubLength;
    /** score array, dimension equals to name type array */
    public Map<EntityClass, Double> score = new LinkedHashMap<>();
    /** attached word map */
    public Map<String, String> attachedWordMap;
    /** true if the phrase is a date; false if not */
    public boolean isDate;

    @JsonCreator
    public Phrase(@JsonProperty("phrase") final List<Token> tokens,
            @JsonProperty("phrasePosition") final int _phrasePos,
            @JsonProperty("phraseLength") final int _phraseLen,
            @JsonProperty("phraseStubPosition") final int _stubPos,
            @JsonProperty("phraseType") final EntityClass type) {
        phrasePosition = _phrasePos;
        phraseLength = _phraseLen;
        phrase = ImmutableList.copyOf(tokens);
        phraseType = type;
        attachedWordMap = new HashMap<>();
        phraseStubPosition = _stubPos;
        phraseStubLength = phrase.size();
    }

    public String phraseString() {
        final StringBuilder sb = new StringBuilder();
        for (final Token aPhrase : phrase)
            sb.append(aPhrase.text).append(" ");
        return sb.toString().trim();
    }

    /** Test if the phrase is a sub phrase of the input phrase. */
    public boolean isSubPhraseOf(final Phrase other) {
        if (phrase.isEmpty())
            return false;

        // TODO: this should be refactored - the intent is not clear, implementation is sketchy
        boolean is = false;
        for (int i = 0; i < other.phrase.size() - phrase.size() + 1; i++) {
            boolean flag = true;
            for (int j = 0; j < phrase.size(); j++) {
                if (!phrase.get(j).text.equalsIgnoreCase(other.phrase.get(i + j).text)) {
                    flag = false;
                    break;
                }
            }
            if (flag) {
                is = true;
                break;
            }
        }
        return is;
    }

    /** This method will do the classification of a Phrase with a EntityClass. */
    public void classify() {
        EntityClass type = null;
        double s = 0;
        boolean ambiguous = false;
        for (final Map.Entry<EntityClass, Double> e : score.entrySet()) {
            if (type == null) {
                type = e.getKey();
                s = e.getValue();
            } else {
                if (Double.compare(e.getValue(), s) > 0) {
                    type = e.getKey();
                    s = e.getValue();
                    ambiguous = false;
                } else if (Double.compare(s, e.getValue()) == 0) {
                    ambiguous = true;
                }
            }
        }
        this.phraseType = ambiguous ? UNKNOWN : type;
    }

    @Override
    public String toString() {
        return "Phrase{" + "phrase=" + phrase + ", phraseType=" + phraseType + ", phrasePosition=" + phrasePosition
                + ", phraseLength=" + phraseLength + ", phraseStubPosition=" + phraseStubPosition
                + ", phraseStubLength=" + phraseStubLength + ", score=" + score.toString() + ", attachedWordMap="
                + attachedWordMap + ", isDate=" + isDate + '}';
    }
}