Java tutorial
/* * Copyright 2014 Hugo m09? Mougard. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package eu.crydee.alignment.aligner.ae; import com.google.common.collect.Maps; import eu.crydee.alignment.aligner.ts.CosineSimilarities; import eu.crydee.alignment.aligner.ts.Sentence; import eu.crydee.alignment.aligner.ts.Token; import java.util.Collection; import java.util.Map; import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CASException; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.DoubleArray; /** * * @author Hugo m09? Mougard */ public class CosineSimilarityAE extends JCasAnnotator_ImplBase { private static final Logger logger = LogManager.getLogger(CosineSimilarityAE.class); public static final String PARAM_VIEW_LEFT = "P3"; @ConfigurationParameter(name = PARAM_VIEW_LEFT, mandatory = true) private String nameEle; public static final String PARAM_VIEW_RIGHT = "P4"; @ConfigurationParameter(name = PARAM_VIEW_RIGHT, mandatory = true) private String nameReg; @Override public void process(JCas jcas) throws AnalysisEngineProcessException { JCas jcasEle, jcasReg; try { jcasEle = jcas.getView(nameEle); jcasReg = jcas.getView(nameReg); } catch (CASException ex) { throw new AnalysisEngineProcessException(ex); } Map<Sentence, Collection<Token>> indexEle = JCasUtil.indexCovered(jcasEle, Sentence.class, Token.class), indexReg = JCasUtil.indexCovered(jcasReg, Sentence.class, Token.class); Map<Sentence, Double> squaresSumsEle = squares(indexEle), squaresSumsReg = squares(indexReg); Sentence[] sentsEle = JCasUtil.select(jcasEle, Sentence.class).toArray(new Sentence[0]), sentsReg = JCasUtil.select(jcasReg, Sentence.class).toArray(new Sentence[0]); for (int e = 0, l = sentsEle.length, m = sentsReg.length; e < l; ++e) { Sentence sentEle = sentsEle[e]; CosineSimilarities csEle = new CosineSimilarities(jcasEle); csEle.setScores(new DoubleArray(jcasEle, m)); sentEle.setCosineSimilarities(csEle); Map<String, Double> scoresEle = scores(indexEle.get(sentEle)); for (int r = 0; r < m; ++r) { Sentence sentReg = sentsReg[r]; CosineSimilarities csReg; if (e == 0) { csReg = new CosineSimilarities(jcasReg); csReg.setScores(new DoubleArray(jcasReg, l)); sentReg.setCosineSimilarities(csReg); } else { csReg = sentReg.getCosineSimilarities(); } Map<String, Double> scoresReg = scores(indexReg.get(sentReg)); double similarity = 0d; for (String tokenEle : scoresEle.keySet()) { if (scoresReg.containsKey(tokenEle)) { similarity += scoresReg.get(tokenEle) * scoresEle.get(tokenEle); } } similarity /= squaresSumsEle.get(sentEle); similarity /= squaresSumsReg.get(sentReg); csEle.setScores(r, similarity); csReg.setScores(e, similarity); } } } private Map<String, Double> scores(Collection<Token> tokens) { return tokens.stream().collect(Collectors.toMap(Token::getLemma, Token::getTfidf, (s, t) -> s)); } private Map<Sentence, Double> squares(Map<Sentence, Collection<Token>> m) { return Maps.transformValues(m, v -> Math.sqrt(v.stream().mapToDouble(t -> Math.pow(t.getTfidf(), 2)).sum())); } }