delfos.rs.contentbased.vsm.booleanvsm.BooleanFeaturesTransformation.java Source code

Java tutorial

Introduction

Here is the source code for delfos.rs.contentbased.vsm.booleanvsm.BooleanFeaturesTransformation.java

Source

/*
 * Copyright (C) 2016 jcastro
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package delfos.rs.contentbased.vsm.booleanvsm;

import delfos.dataset.basic.features.Feature;
import delfos.dataset.basic.features.FeatureType;
import delfos.dataset.basic.item.ContentDataset;
import delfos.rs.contentbased.vsm.booleanvsm.profile.BooleanUserProfile;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.math4.util.Pair;

/**
 * Objeto que almacena una transformacin de caractersticas y valores a un vector de ocurrencias.
 *
 * @author jcastro-inf ( https://github.com/jcastro-inf )
 *
 * @version 10-Octubre-2013
 */
public class BooleanFeaturesTransformation implements Serializable, Iterable<FeatureValue> {

    private static final long serialVersionUID = 1L;
    /**
     * Guarda la correspondencia entre el valor de cada caracterstica y su posicin en el perfil booleano.
     */
    private final Map<Feature, Map<Object, Long>> featureValuesIndexes = new TreeMap<>();
    private final int numFeatures;

    public BooleanFeaturesTransformation(ContentDataset contentDataset) {

        int index = 0;
        for (Feature f : contentDataset.getFeatures()) {
            for (Object value : contentDataset.getAllFeatureValues(f)) {
                addFeatureToIndex(f, value, index++);
            }
        }
        numFeatures = index;
    }

    private long addFeatureToIndex(Feature feature, Object value, long index) {
        if (featureValuesIndexes.containsKey(feature)) {
            if (featureValuesIndexes.get(feature).containsKey(value)) {
                //Todo correcto, ya estaba en su sitio.
            } else {
                featureValuesIndexes.get(feature).put(value, index++);
            }
        } else {
            featureValuesIndexes.put(feature, new TreeMap<>());
            featureValuesIndexes.get(feature).put(value, index++);
        }
        return featureValuesIndexes.get(feature).get(value);
    }

    /**
     * Crea un vector para representar el perfil, que slo permite valores dentro de los definidos en esta
     * transformacin booleana.
     *
     * @return
     */
    public SparseVector<Long> newProfile() {
        return SparseVector.create(getDomain());
    }

    public Collection<Long> getDomain() {
        List<Long> ret = new ArrayList<>(numFeatures);
        for (long i = 0; i < numFeatures; i++) {
            ret.add(i);
        }
        return ret;
    }

    /**
     * Devuelve todos los valores que toma la caracterstica indicada.
     *
     * @param feature Caracterstica para la que se buscan sus posibles valores.
     * @return Valores que toman los productos. Tienen la peculiaridad de que algun tem tiene para la caracterstica
     * indicada el valor devuelto.
     */
    public Iterable<Object> getAllFeatureValues(Feature feature) {
        ArrayList<Object> ret = new ArrayList<>(featureValuesIndexes.get(feature).keySet());
        return ret;
    }

    /**
     * Nmero de pares (caracterstica, valor) distintos que se dan en el dataset de contenido.
     *
     * @return Nmero de pares distintos.
     */
    public int sizeOfAllFeatureValues() {
        int size = 0;
        for (Feature feature : featureValuesIndexes.keySet()) {
            size += featureValuesIndexes.get(feature).size();
        }
        return size;
    }

    /**
     * Transforma el vector disperso en un mapa con los valores de las caractersticas.
     *
     * @param sparseVector Vector de valores dados en el dominio de esta transformacin booleana.
     * @return
     */
    public Map<Feature, Map<Object, Double>> getFeatureValueMap(SparseVector<Long> sparseVector) {
        Map<Feature, Map<Object, Double>> ret = new TreeMap<>();

        for (Pair<Long, Double> entry : sparseVector.entrySet()) {
            long idFeatureValue = entry.getKey();
            double value = entry.getValue();

            FeatureValue featureValuePair = getFeatureValue(idFeatureValue);
            Feature feature = featureValuePair.feature;
            Object featureValue = featureValuePair.value;

            if (!ret.containsKey(feature)) {
                ret.put(feature, new TreeMap<>());
            }

            ret.get(feature).put(featureValue, value);

        }

        return ret;
    }

    protected FeatureValue getFeatureValue(long index) {
        for (Feature f : featureValuesIndexes.keySet()) {
            if (featureValuesIndexes.get(f).containsValue(index)) {
                for (Map.Entry<Object, Long> entry : featureValuesIndexes.get(f).entrySet()) {
                    if (entry.getValue().equals(index)) {
                        return new FeatureValue(f, entry.getKey());
                    }
                }
            }
        }

        throw new IndexOutOfBoundsException("The index " + index + " is not defined");
    }

    public long getFeatureIndex(Feature f, Object value) {
        if (f.getType() == FeatureType.Unary) {
            value = "1";
        }

        if (featureValuesIndexes.containsKey(f)) {
            if (featureValuesIndexes.get(f).containsKey(value)) {
                return featureValuesIndexes.get(f).get(value);
            } else {
                throw new IllegalArgumentException("The feature '" + f + "' value '" + value + "' is not defined.");
            }
        } else {
            throw new IllegalArgumentException("The feature '" + f + "' is not defined");
        }

    }

    public List<Double> getDoubleVector(SparseVector<Long> sparseVector) {

        List<Double> ret = new ArrayList<>(numFeatures);
        for (int i = 0; i < numFeatures; i++) {
            ret.add(0.0);
        }
        for (Pair<Long, Double> entry : sparseVector.entrySet()) {
            ret.set(entry.getKey().intValue(), (double) entry.getValue());
        }
        return ret;
    }

    public List<Double> getDoubleValuesVector(BooleanUserProfile booleanUserProfile) {
        List<Double> ret = new ArrayList<>(numFeatures);
        for (int i = 0; i < numFeatures; i++) {
            ret.add(0.0);
        }

        for (Feature feature : booleanUserProfile.getFeatures()) {
            for (Object featureValue : booleanUserProfile.getValuedFeatureValues(feature)) {
                long idFeatureValue = getFeatureIndex(feature, featureValue);
                double featureValueValue = booleanUserProfile.getFeatureValueValue(feature, featureValue);

                ret.set((int) idFeatureValue, (double) featureValueValue);
            }
        }

        return ret;
    }

    public SparseVector<Long> getDoubleValuesSparseVector(BooleanUserProfile booleanUserProfile) {
        SparseVector<Long> userProfile = newProfile();

        for (Feature feature : booleanUserProfile.getFeatures()) {
            for (Object featureValue : booleanUserProfile.getValuedFeatureValues(feature)) {
                long idFeatureValue = getFeatureIndex(feature, featureValue);
                double featureValueValue = booleanUserProfile.getFeatureValueValue(feature, featureValue);

                userProfile.set(idFeatureValue, featureValueValue);
            }
        }

        return userProfile;
    }

    public SparseVector<Long> getDoubleWeightsSparseVector(BooleanUserProfile booleanUserProfile) {
        SparseVector<Long> userProfile = newProfile();

        for (Feature feature : booleanUserProfile.getFeatures()) {
            for (Object featureValue : booleanUserProfile.getValuedFeatureValues(feature)) {
                long idFeatureValue = getFeatureIndex(feature, featureValue);
                double featureValueWeight = booleanUserProfile.getFeatureValueWeight(feature, featureValue);

                userProfile.set(idFeatureValue, featureValueWeight);
            }
        }

        return userProfile;
    }

    public List<Double> getDoubleWeightsVector(BooleanUserProfile booleanUserProfile) {
        List<Double> ret = new ArrayList<>(numFeatures);
        for (int i = 0; i < numFeatures; i++) {
            ret.add(0.0);
        }

        for (Feature feature : booleanUserProfile.getFeatures()) {
            for (Object featureValue : booleanUserProfile.getValuedFeatureValues(feature)) {
                long idFeatureValue = getFeatureIndex(feature, featureValue);
                double featureValueValue = booleanUserProfile.getFeatureValueWeight(feature, featureValue);

                ret.set((int) idFeatureValue, (double) featureValueValue);
            }
        }

        return ret;
    }

    @Override
    public Iterator<FeatureValue> iterator() {
        Collection<FeatureValue> list = new ArrayList<>();

        for (Feature feature : featureValuesIndexes.keySet()) {
            for (Object value : featureValuesIndexes.get(feature).keySet()) {
                list.add(new FeatureValue(feature, value));
            }
        }
        list = Collections.unmodifiableCollection(list);

        return list.iterator();
    }
}