com.rapidminer.operator.preprocessing.discretization.UserBasedDiscretization.java Source code

Java tutorial

Introduction

Here is the source code for com.rapidminer.operator.preprocessing.discretization.UserBasedDiscretization.java

Source

/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.discretization;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.lang.ArrayUtils;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import com.rapidminer.tools.container.Tupel;

/**
 * This operator discretizes a numerical attribute to either a nominal or an ordinal attribute. The
 * numerical values are mapped to the classes according to the thresholds specified by the user. The
 * user can define the classes by specifying the upper limits of each class. The lower limit of the
 * next class is automatically specified as the upper limit of the previous one. A parameter defines
 * to which adjacent class values that are equal to the given limits should be mapped. If the upper
 * limit in the last list entry is not equal to Infinity, an additional class which is automatically
 * named is added. If a '?' is given as class value the according numerical values are mapped to
 * unknown values in the resulting attribute.
 *
 * @author Sebastian Land
 */
public class UserBasedDiscretization extends AbstractDiscretizationOperator {

    static {
        registerDiscretizationOperator(UserBasedDiscretization.class);
    }

    /** The parameter name for the upper limit. */
    public static final String PARAMETER_UPPER_LIMIT = "upper_limit";

    /** The parameter name for "Defines the classes and the upper limits of each class." */
    public static final String PARAMETER_RANGE_NAMES = "classes";

    private static final String PARAMETER_CLASS_NAME = "class_names";

    /**
     * Incompatible version, old version writes into the exampleset, if original output port is not
     * connected.
     */
    private static final OperatorVersion VERSION_MAY_WRITE_INTO_DATA = new OperatorVersion(7, 1, 1);

    public UserBasedDiscretization(OperatorDescription description) {
        super(description);
    }

    @Override
    protected Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd)
            throws UndefinedParameterError {
        AttributeMetaData newAMD = new AttributeMetaData(amd.getName(), Ontology.NOMINAL, amd.getRole());
        List<String[]> rangeList = getParameterList(PARAMETER_RANGE_NAMES);
        TreeSet<String> values = new TreeSet<String>();
        for (String[] pair : rangeList) {
            values.add(pair[0]);
        }
        newAMD.setValueSet(values, SetRelation.SUBSET);

        return Collections.singletonList(newAMD);
    }

    @Override
    public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException {
        HashMap<String, SortedSet<Tupel<Double, String>>> ranges = new HashMap<String, SortedSet<Tupel<Double, String>>>();
        List<String[]> rangeList = getParameterList(PARAMETER_RANGE_NAMES);

        TreeSet<Tupel<Double, String>> thresholdPairs = new TreeSet<Tupel<Double, String>>();
        for (String[] pair : rangeList) {
            thresholdPairs.add(new Tupel<Double, String>(Double.valueOf(pair[1]), pair[0]));
        }
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (attribute.isNumerical()) {
                ranges.put(attribute.getName(), thresholdPairs);
            }
        }

        DiscretizationModel model = new DiscretizationModel(exampleSet);
        model.setRanges(ranges);
        return model;
    }

    @Override
    public Class<? extends PreprocessingModel> getPreprocessingModelClass() {
        return DiscretizationModel.class;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();

        ParameterType classType = new ParameterTypeString(PARAMETER_CLASS_NAME, "The name of this range.");
        ParameterType threshold = new ParameterTypeDouble(PARAMETER_UPPER_LIMIT, "The upper limit.",
                Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
        List<String[]> defaultList = new LinkedList<String[]>();

        defaultList.add(new String[] { "first", Double.NEGATIVE_INFINITY + "" });
        defaultList.add(new String[] { "last", Double.POSITIVE_INFINITY + "" });

        ParameterType type = new ParameterTypeList(PARAMETER_RANGE_NAMES,
                "Defines the classes and the upper limits of each class.", classType, threshold, defaultList);
        type.setExpert(false);
        types.add(type);

        return types;
    }

    @Override
    public boolean writesIntoExistingData() {
        if (getCompatibilityLevel().isAbove(VERSION_MAY_WRITE_INTO_DATA)) {
            return false;
        } else {
            // old version: true only if original output port is connected
            return isOriginalOutputConnected() && super.writesIntoExistingData();
        }
    }

    @Override
    public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
        return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(),
                UserBasedDiscretization.class, attributeSelector);
    }

    @Override
    public OperatorVersion[] getIncompatibleVersionChanges() {
        return (OperatorVersion[]) ArrayUtils.addAll(super.getIncompatibleVersionChanges(),
                new OperatorVersion[] { VERSION_MAY_WRITE_INTO_DATA });
    }
}