eu.crydee.alignment.aligner.ae.CosineSimilarityAE.java Source code

Java tutorial

Introduction

Here is the source code for eu.crydee.alignment.aligner.ae.CosineSimilarityAE.java

Source

/*
 * Copyright 2014 Hugo m09? Mougard.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.crydee.alignment.aligner.ae;

import com.google.common.collect.Maps;
import eu.crydee.alignment.aligner.ts.CosineSimilarities;
import eu.crydee.alignment.aligner.ts.Sentence;
import eu.crydee.alignment.aligner.ts.Token;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.DoubleArray;

/**
 *
 * @author Hugo m09? Mougard
 */
public class CosineSimilarityAE extends JCasAnnotator_ImplBase {

    private static final Logger logger = LogManager.getLogger(CosineSimilarityAE.class);

    public static final String PARAM_VIEW_LEFT = "P3";
    @ConfigurationParameter(name = PARAM_VIEW_LEFT, mandatory = true)
    private String nameEle;

    public static final String PARAM_VIEW_RIGHT = "P4";
    @ConfigurationParameter(name = PARAM_VIEW_RIGHT, mandatory = true)
    private String nameReg;

    @Override
    public void process(JCas jcas) throws AnalysisEngineProcessException {
        JCas jcasEle, jcasReg;
        try {
            jcasEle = jcas.getView(nameEle);
            jcasReg = jcas.getView(nameReg);
        } catch (CASException ex) {
            throw new AnalysisEngineProcessException(ex);
        }
        Map<Sentence, Collection<Token>> indexEle = JCasUtil.indexCovered(jcasEle, Sentence.class, Token.class),
                indexReg = JCasUtil.indexCovered(jcasReg, Sentence.class, Token.class);
        Map<Sentence, Double> squaresSumsEle = squares(indexEle), squaresSumsReg = squares(indexReg);
        Sentence[] sentsEle = JCasUtil.select(jcasEle, Sentence.class).toArray(new Sentence[0]),
                sentsReg = JCasUtil.select(jcasReg, Sentence.class).toArray(new Sentence[0]);
        for (int e = 0, l = sentsEle.length, m = sentsReg.length; e < l; ++e) {
            Sentence sentEle = sentsEle[e];
            CosineSimilarities csEle = new CosineSimilarities(jcasEle);
            csEle.setScores(new DoubleArray(jcasEle, m));
            sentEle.setCosineSimilarities(csEle);
            Map<String, Double> scoresEle = scores(indexEle.get(sentEle));
            for (int r = 0; r < m; ++r) {
                Sentence sentReg = sentsReg[r];
                CosineSimilarities csReg;
                if (e == 0) {
                    csReg = new CosineSimilarities(jcasReg);
                    csReg.setScores(new DoubleArray(jcasReg, l));
                    sentReg.setCosineSimilarities(csReg);
                } else {
                    csReg = sentReg.getCosineSimilarities();
                }
                Map<String, Double> scoresReg = scores(indexReg.get(sentReg));
                double similarity = 0d;
                for (String tokenEle : scoresEle.keySet()) {
                    if (scoresReg.containsKey(tokenEle)) {
                        similarity += scoresReg.get(tokenEle) * scoresEle.get(tokenEle);
                    }
                }
                similarity /= squaresSumsEle.get(sentEle);
                similarity /= squaresSumsReg.get(sentReg);
                csEle.setScores(r, similarity);
                csReg.setScores(e, similarity);
            }
        }
    }

    private Map<String, Double> scores(Collection<Token> tokens) {
        return tokens.stream().collect(Collectors.toMap(Token::getLemma, Token::getTfidf, (s, t) -> s));
    }

    private Map<Sentence, Double> squares(Map<Sentence, Collection<Token>> m) {
        return Maps.transformValues(m,
                v -> Math.sqrt(v.stream().mapToDouble(t -> Math.pow(t.getTfidf(), 2)).sum()));
    }
}