dkpro.similarity.algorithms.lexical.string.SecondStringComparator_ImplBase.java Source code

Java tutorial

Introduction

Here is the source code for dkpro.similarity.algorithms.lexical.string.SecondStringComparator_ImplBase.java

Source

/*******************************************************************************
 * Copyright 2012
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl-3.0.txt
 *******************************************************************************/
package dkpro.similarity.algorithms.lexical.string;

import java.util.Collection;

import org.apache.commons.lang.StringUtils;

import com.wcohen.ss.AbstractStringDistance;
import com.wcohen.ss.api.StringWrapper;

import dkpro.similarity.algorithms.api.SimilarityException;
import dkpro.similarity.algorithms.api.TextSimilarityMeasureBase;

public abstract class SecondStringComparator_ImplBase extends TextSimilarityMeasureBase {

    protected AbstractStringDistance secondStringMeasureL1;
    protected AbstractStringDistance secondStringMeasureL2;

    public enum SecondStringTextSimilarityMeasure {
        Jaro, JaroWinkler, Levenshtein, MongeElkan
    }

    @Override
    public double getSimilarity(String s1, String s2) throws SimilarityException {
        // preparing the string saves some cycles through caching, if many comparisons with the same string are done.
        StringWrapper wrappedString1 = secondStringMeasureL1.prepare(s1);
        StringWrapper wrappedString2 = secondStringMeasureL1.prepare(s2);

        return secondStringMeasureL1.score(wrappedString1, wrappedString2);
    }

    @Override
    public double getSimilarity(Collection<String> s1, Collection<String> s2) throws SimilarityException {
        if (s1.size() == 0 || s2.size() == 0) {
            return 0.0;
        }

        String concatenatedString1 = StringUtils.join(s1, " ");
        String concatenatedString2 = StringUtils.join(s2, " ");

        if (concatenatedString1.length() == 0 || concatenatedString2.length() == 0) {
            return 0.0;
        }

        // find tokens (I know that we already know what the tokens are, but the SecondString implementation needs it that way)
        StringWrapper wrappedString1 = secondStringMeasureL2.prepare(concatenatedString1);
        StringWrapper wrappedString2 = secondStringMeasureL2.prepare(concatenatedString2);

        double distance = secondStringMeasureL2.score(wrappedString1, wrappedString2);

        return distance;
    }
}