pl.edu.icm.cermine.bibref.parsing.features.IsDashBetweenWordsFeature.java Source code

Java tutorial

Introduction

Here is the source code for pl.edu.icm.cermine.bibref.parsing.features.IsDashBetweenWordsFeature.java

Source

/**
 * This file is part of CERMINE project.
 * Copyright (c) 2011-2013 ICM-UW
 *
 * CERMINE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CERMINE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with CERMINE. If not, see <http://www.gnu.org/licenses/>.
 */

package pl.edu.icm.cermine.bibref.parsing.features;

import org.apache.commons.lang.ArrayUtils;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.tools.classification.features.FeatureCalculator;

/**
 *
 * @author Dominika Tkaczyk (dtkaczyk@icm.edu.pl)
 */
public class IsDashBetweenWordsFeature extends FeatureCalculator<CitationToken, Citation> {

    private static final char[] DASH_CHARS = { '-', '\u002D', '\u2010', '\u2011', '\u2012', '\u2013', '\u2014',
            '\u2015', '\u207B', '\u208B', '\u2212' };

    @Override
    public double calculateFeatureValue(CitationToken object, Citation context) {
        String text = object.getText();
        if (text.length() != 1 || object.getStartIndex() <= 0
                || object.getEndIndex() >= context.getText().length()) {
            return 0;
        }
        if (!ArrayUtils.contains(DASH_CHARS, text.charAt(0))) {
            return 0;
        }
        return (Character.isLetter(context.getText().charAt(object.getStartIndex() - 1))
                && Character.isLetter(context.getText().charAt(object.getEndIndex()))) ? 1 : 0;
    }

}