Java tutorial
/** * Copyright (C) 2001-2016 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import java.util.Collection; import java.util.List; import org.apache.commons.lang.ArrayUtils; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorVersion; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeAttribute; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorResourceConsumptionHandler; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.math.container.Range; /** * This operator adds random attributes and white noise to the data. New random attributes are * simply filled with random data which is not correlated to the label at all. Additionally, this * operator might add noise to the label attribute or to the regular attributes. In case of a * numerical label the given <code>label_noise</code> is the percentage of the label range which * defines the standard deviation of normal distributed noise which is added to the label attribute. * For nominal labels the parameter <code>label_noise</code> defines the probability to randomly * change the nominal label value. In case of adding noise to regular attributes the parameter * <code>default_attribute_noise</code> simply defines the standard deviation of normal distributed * noise without using the attribute value range. Using the parameter list it is possible to set * different noise levels for different attributes. However, it is not possible to add noise to * nominal attributes. * * @author Ingo Mierswa */ public class NoiseOperator extends PreprocessingOperator { /** The parameter name for "Adds this number of random attributes." */ public static final String PARAMETER_RANDOM_ATTRIBUTES = "random_attributes"; /** * The parameter name for "Add this percentage of a numerical label range as a normal * distributed noise or probability for a nominal label change." */ public static final String PARAMETER_LABEL_NOISE = "label_noise"; /** The parameter name for "The standard deviation of the default attribute noise." */ public static final String PARAMETER_DEFAULT_ATTRIBUTE_NOISE = "default_attribute_noise"; /** The parameter name for "List of noises for each attributes." */ public static final String PARAMETER_NOISE = "noise"; /** The parameter name for "Offset added to the values of each random attribute" */ public static final String PARAMETER_OFFSET = "offset"; /** * The parameter name for "Linear factor multiplicated with the values of each random * attribute" */ public static final String PARAMETER_LINEAR_FACTOR = "linear_factor"; /** * Incompatible version, old version writes into the exampleset, if original output port is not * connected. */ private static final OperatorVersion VERSION_MAY_WRITE_INTO_DATA = new OperatorVersion(7, 1, 1); public NoiseOperator(OperatorDescription description) { super(description); } @Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); String[] attributeNames = new String[getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES)]; for (int i = 0; i < getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES); i++) { attributeNames[i] = AttributeFactory.createName("random"); } return new NoiseModel(exampleSet, RandomGenerator.getRandomGenerator(this), getParameterList(PARAMETER_NOISE), getParameterAsDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE), getParameterAsDouble(PARAMETER_LABEL_NOISE), getParameterAsDouble(PARAMETER_OFFSET), getParameterAsDouble(PARAMETER_LINEAR_FACTOR), attributeNames); } @Override protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError { AttributeMetaData label = exampleSetMetaData.getLabelMetaData(); if (label != null) { if (label.isNumerical() && getParameterAsDouble(PARAMETER_LABEL_NOISE) > 0) { label.setValueSetRelation(SetRelation.SUPERSET); } } double defaultNoise = getParameterAsDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE); if (defaultNoise > 0) { for (AttributeMetaData amd : exampleSetMetaData.getAllAttributes()) { if (!amd.isSpecial()) { if (amd.isNumerical()) { amd.setValueSetRelation(SetRelation.SUPERSET); } } } } int numberOfRandomAttributes = getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES); for (int i = 0; i < numberOfRandomAttributes; i++) { AttributeMetaData amd = new AttributeMetaData("random" + ((i == 0) ? "" : i + ""), Ontology.REAL); amd.setValueRange(new Range(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), SetRelation.SUBSET); exampleSetMetaData.addAttribute(amd); } return exampleSetMetaData; } /** * This method isn't used anymore, since the calling super function is overridden. */ @Override protected Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd) { return null; } @Override public boolean isSupportingAttributeRoles() { return true; } /** This operator does not support view creation proper. Hence hide the parameter */ @Override public boolean isSupportingView() { return false; } @Override public Class<? extends PreprocessingModel> getPreprocessingModelClass() { return NoiseModel.class; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_RANDOM_ATTRIBUTES, "Adds this number of random attributes.", 0, Integer.MAX_VALUE, 0); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_LABEL_NOISE, "Add this percentage of a numerical label range as a normal distributed noise or probability for a nominal label change.", 0.0d, Double.POSITIVE_INFINITY, 0.05d); type.setExpert(false); types.add(type); types.add(new ParameterTypeDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE, "The standard deviation of the default attribute noise.", 0.0d, Double.POSITIVE_INFINITY, 0.0d)); types.add(new ParameterTypeList(PARAMETER_NOISE, "List of noises for each attributes.", new ParameterTypeAttribute("attribute", "To this attribute noise is added.", getExampleSetInputPort()), new ParameterTypeDouble(PARAMETER_NOISE, "The strength of gaussian noise, which is added to this attribute.", 0.0d, Double.POSITIVE_INFINITY, 0.05d))); type = new ParameterTypeDouble(PARAMETER_OFFSET, "Offset added to the values of each random attribute", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d); types.add(type); type = new ParameterTypeDouble(PARAMETER_LINEAR_FACTOR, "Linear factor multiplicated with the values of each random attribute", 0.0d, Double.POSITIVE_INFINITY, 1.0d); types.add(type); types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); return types; } @Override protected int[] getFilterValueTypes() { return new int[] { Ontology.ATTRIBUTE_VALUE }; } @Override public boolean writesIntoExistingData() { if (getCompatibilityLevel().isAbove(VERSION_MAY_WRITE_INTO_DATA)) { return super.writesIntoExistingData(); } else { // old version: true only if original output port is connected return isOriginalOutputConnected() && super.writesIntoExistingData(); } } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), NoiseOperator.class, attributeSelector); } @Override public OperatorVersion[] getIncompatibleVersionChanges() { return (OperatorVersion[]) ArrayUtils.addAll(super.getIncompatibleVersionChanges(), new OperatorVersion[] { VERSION_MAY_WRITE_INTO_DATA }); } }