Java examples for Machine Learning AI:datumbox
DataModeling example via datumbox
/**//from w w w .j av a 2 s. c om * Copyright (C) 2013-2015 Vasilis Vryniotis <bbriniotis@datumbox.com> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import com.datumbox.applications.datamodeling.Modeler; import com.datumbox.common.dataobjects.Dataset; import com.datumbox.common.dataobjects.Record; import com.datumbox.common.dataobjects.TypeInference; import com.datumbox.common.persistentstorage.ConfigurationFactory; import com.datumbox.common.persistentstorage.interfaces.DatabaseConfiguration; import com.datumbox.common.utilities.PHPfunctions; import com.datumbox.common.utilities.RandomGenerator; import com.datumbox.framework.machinelearning.common.bases.mlmodels.BaseMLmodel; import com.datumbox.framework.machinelearning.datatransformation.DummyXYMinMaxNormalizer; import com.datumbox.framework.machinelearning.regression.NLMS; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.Reader; import java.net.URISyntaxException; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; /** * DataModeling example. * * @author Vasilis Vryniotis <bbriniotis@datumbox.com> */ public class DataModeling { /** * Example of how to use the Modeler class. * * @param args the command line arguments * @throws FileNotFoundException * @throws URISyntaxException */ public static void main(String[] args) throws FileNotFoundException, URISyntaxException { /** * There are two configuration files in the resources folder: * * - datumbox.config.properties: It contains the configuration for the storage engines (required) * - logback.xml: It contains the configuration file for the logger (optional) */ //Initialization //-------------- RandomGenerator.setGlobalSeed(42L); //optionally set a specific seed for all Random objects DatabaseConfiguration dbConf = ConfigurationFactory.INMEMORY .getConfiguration(); //in-memory maps //DatabaseConfiguration dbConf = ConfigurationFactory.MAPDB.getConfiguration(); //mapdb maps //Reading Data //------------ Reader fileReader = new FileReader(Paths.get( DataModeling.class .getClassLoader() .getResource( "datasets/labor-statistics/longley.csv") .toURI()).toFile()); Map<String, TypeInference.DataType> headerDataTypes = new HashMap<>(); headerDataTypes.put("Employed", TypeInference.DataType.NUMERICAL); headerDataTypes.put("GNP.deflator", TypeInference.DataType.NUMERICAL); headerDataTypes.put("GNP", TypeInference.DataType.NUMERICAL); headerDataTypes.put("Unemployed", TypeInference.DataType.NUMERICAL); headerDataTypes.put("Armed.Forces", TypeInference.DataType.NUMERICAL); headerDataTypes.put("Population", TypeInference.DataType.NUMERICAL); headerDataTypes.put("Year", TypeInference.DataType.NUMERICAL); Dataset trainingDataset = Dataset.Builder.parseCSVFile(fileReader, "Employed", headerDataTypes, ',', '"', "\r\n", dbConf); Dataset testingDataset = trainingDataset.copy(); //Setup Training Parameters //------------------------- Modeler.TrainingParameters trainingParameters = new Modeler.TrainingParameters(); //Model Configuration trainingParameters.setMLmodelClass(NLMS.class); trainingParameters .setMLmodelTrainingParameters(new NLMS.TrainingParameters()); //Set data transfomation configuration trainingParameters .setDataTransformerClass(DummyXYMinMaxNormalizer.class); trainingParameters .setDataTransformerTrainingParameters(new DummyXYMinMaxNormalizer.TrainingParameters()); //Set feature selection configuration trainingParameters.setFeatureSelectionClass(null); trainingParameters.setFeatureSelectionTrainingParameters(null); //Fit the modeler //--------------- Modeler modeler = new Modeler("LaborStatistics", dbConf); modeler.fit(trainingDataset, trainingParameters); //Use the modeler //--------------- //Get validation metrics on the training set BaseMLmodel.ValidationMetrics vm = modeler .validate(trainingDataset); modeler.setValidationMetrics(vm); //store them in the model for future reference //Predict a new dataset modeler.predict(testingDataset); System.out.println("Test Results:"); for (Integer rId : testingDataset) { Record r = testingDataset.get(rId); System.out.println("Record " + rId + " - Real Y: " + r.getY() + ", Predicted Y: " + r.getYPredicted()); } System.out.println("Modeler Statistics: " + PHPfunctions.var_export(vm)); //Clean up //-------- //Erase the modeler. This removes all files. modeler.erase(); //Erase datasets. trainingDataset.erase(); testingDataset.erase(); } }