Java tutorial
/* * Copyright 2016 Simone Filice and Giuseppe Castellucci and Danilo Croce and Roberto Basili * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package it.uniroma2.sag.kelp.linearization.nystrom; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import it.uniroma2.sag.kelp.data.example.Example; import it.uniroma2.sag.kelp.data.example.SimpleExample; import it.uniroma2.sag.kelp.data.representation.Representation; import it.uniroma2.sag.kelp.data.representation.vector.DenseVector; import it.uniroma2.sag.kelp.utils.FileUtils; /** * This class implements the Ensemble Nystrom Method to approximate the implicit * space underlying a Kernel Function, thus producing a low-dimensional dense * representation. <br> * Several projection functions can be defined according to the Nystrom Method * and they can be use together to improve approximation quality. <br> * * More details can be found in the following paper. If you use this class, * <b>please cite</b>: <br> * <li>Danilo Croce and Roberto Basili. Large-scale Kernel-based Language * Learning through the Ensemble Nystrom methods. In Proceedings of ECIR 2016. * Padova, Italy, 2016 <br> * * @author Danilo Croce */ public class NystromMethodEnsemble extends ArrayList<NystromMethod> { /** * */ private static final long serialVersionUID = -7379502720881996512L; /** * Load an Ensemble of Nystrom projectors saved on file. * * @param inputFilePath * The input file path * @return * @throws FileNotFoundException * @throws IOException */ public static NystromMethodEnsemble load(String inputFilePath) throws FileNotFoundException, IOException { ObjectMapper mapper = new ObjectMapper(); InputStream inputStream = FileUtils.createInputStream(inputFilePath); return mapper.readValue(inputStream, NystromMethodEnsemble.class); } /** * Save an Ensemble of Nystrom projectors on file. * * @param outputFilePath * The output file name * @throws FileNotFoundException * @throws IOException */ public void save(String outputFilePath) throws FileNotFoundException, IOException { ObjectMapper mapper = new ObjectMapper(); mapper.enable(SerializationFeature.INDENT_OUTPUT); OutputStream outputStream = FileUtils.createOutputStream(outputFilePath); mapper.writeValue(outputStream, this); } /** * Given an example, this method produces a new <code>Example * <code> containing a single representation, i.e. a dense vector that is * the concatenation of the vectors obtained by each projection * function used in the Ensemble. The <code>label</code>s are copied from * the input example. * * @param example * the input example * @param newRepresentationName * the identifier of the new dense vector * @return a new <code>Example <code> containing a single representation, * i.e. a dense vector that is the concatenation of the vectors * obtained by each projection function used in the Ensemble * @throws InstantiationException */ public Example linearizeByEnsembleAndJuxtaposition(Example example, String newRepresentationName) throws InstantiationException { if (size() == 1) { return get(0).getLinearizedExample(example, newRepresentationName); } ArrayList<Float> weights = new ArrayList<Float>(); for (int i = 0; i < size(); i++) { weights.add(1f); } DenseVector denseVector = getDenseVectorByEnsembleAndJuxtaposition(example, weights); HashMap<String, Representation> representations = new HashMap<String, Representation>(); representations.put(newRepresentationName, denseVector); return new SimpleExample(example.getLabels(), representations); } /** * Given an example, this method produces a * <code>DenseVector</code> that is the concatenation of the vectors * obtained by each projection functions used in the Ensemble. * * @param example * the input example * @return the concatenation of the vectors obtained by each projection * functions used in the Ensemble. * @throws InstantiationException */ public DenseVector getDenseVectorByEnsembleAndJuxtaposition(Example example) throws InstantiationException { List<Float> weights = new ArrayList<Float>(); float weight = 1f / (float) size(); for (int i = 0; i < size(); i++) { weights.add(weight); } return getDenseVectorByEnsembleAndJuxtaposition(example, weights); } /** * Given an example, this method produces a * <code>DenseVector</code> that is the concatenation of the vectors * obtained by each projection functions used in the Ensemble. Each vector * used in the concatenation is multiplied by a corresponding weight * * @param example * The input example * @param weights * the weights applied to each vector before the concatenation * @return * @throws InstantiationException */ public DenseVector getDenseVectorByEnsembleAndJuxtaposition(Example example, List<Float> weights) throws InstantiationException { int m = size(); int newDimensionality = 0; double[][] ensembleBuffer = new double[m][]; for (int i = 0; i < size(); i++) { ensembleBuffer[i] = get(i).calculateVector(example); newDimensionality += ensembleBuffer[i].length; } double[] newVector = new double[newDimensionality]; int index = 0; for (int i = 0; i < size(); i++) { float weight = weights.get(i); for (int j = 0; j < ensembleBuffer[i].length; j++) { newVector[index] = weight * ensembleBuffer[i][j]; index++; } } return new DenseVector(newVector); } /** * @return The ranks of the spaces (a rank for each projection function) * representing the linearized examples */ public float[] getRanks() { float[] ranks = new float[size()]; for (int i = 0; i < size(); i++) { ranks[i] = get(i).getRank(); } return ranks; } }