Java tutorial
/* file: LinearRegressionNormEq.java */ /* // Copyright(C) 2014-2015 Intel Corporation. All Rights Reserved. // // The source code, information and material ("Material") contained herein is // owned by Intel Corporation or its suppliers or licensors, and title to such // Material remains with Intel Corporation or its suppliers or licensors. The // Material contains proprietary information of Intel or its suppliers and // licensors. The Material is protected by worldwide copyright laws and treaty // provisions. No part of the Material may be used, copied, reproduced, // modified, published, uploaded, posted, transmitted, distributed or disclosed // in any way without Intel's prior express written permission. No license // under any patent, copyright or other intellectual property rights in the // Material is granted to or conferred upon you, either expressly, by // implication, inducement, estoppel or otherwise. Any license under such // intellectual property rights must be express and approved by Intel in // writing. // // *Third Party trademarks are the property of their respective owners. // // Unless otherwise agreed by Intel in writing, you may not remove or alter // this notice or any other notice embedded in Materials by Intel or Intel's // suppliers or licensors in any way. // //////////////////////////////////////////////////////////////////////////////// // Content: // Java sample of multiple linear regression in the distributed processing // mode. // // The program trains the multiple linear regression model on a training // data set with the normal equations method and computes regression for // the test data. //////////////////////////////////////////////////////////////////////////////// */ package DAAL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.filecache.DistributedCache; import java.net.URI; import com.intel.daal.data_management.data.*; import com.intel.daal.data_management.data_source.*; import com.intel.daal.services.*; /* Implement Tool to be able to pass -libjars on start */ public class LinearRegressionNormEq extends Configured implements Tool { public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new LinearRegressionNormEq(), args); System.exit(res); } @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "Linear regression with normal equations method (normEq) Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(LinearRegressionNormEqStep1TrainingMapper.class); job.setReducerClass(LinearRegressionNormEqStep2TrainingReducerAndPrediction.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(LinearRegressionNormEq.class); return job.waitForCompletion(true) ? 0 : 1; } }