Java tutorial
/* file: Kmeans.java */ /* // Copyright(C) 2014-2015 Intel Corporation. All Rights Reserved. // // The source code, information and material ("Material") contained herein is // owned by Intel Corporation or its suppliers or licensors, and title to such // Material remains with Intel Corporation or its suppliers or licensors. The // Material contains proprietary information of Intel or its suppliers and // licensors. The Material is protected by worldwide copyright laws and treaty // provisions. No part of the Material may be used, copied, reproduced, // modified, published, uploaded, posted, transmitted, distributed or disclosed // in any way without Intel's prior express written permission. No license // under any patent, copyright or other intellectual property rights in the // Material is granted to or conferred upon you, either expressly, by // implication, inducement, estoppel or otherwise. Any license under such // intellectual property rights must be express and approved by Intel in // writing. // // *Third Party trademarks are the property of their respective owners. // // Unless otherwise agreed by Intel in writing, you may not remove or alter // this notice or any other notice embedded in Materials by Intel or Intel's // suppliers or licensors in any way. // //////////////////////////////////////////////////////////////////////////////// // Content: // Java sample of K-Means clustering in the distributed processing mode //////////////////////////////////////////////////////////////////////////////// */ package DAAL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.filecache.DistributedCache; import java.net.URI; import com.intel.daal.data_management.data.*; import com.intel.daal.data_management.data_source.*; import com.intel.daal.services.*; /* Implement Tool to be able to pass -libjars on start */ public class Kmeans extends Configured implements Tool { private static final int nIterations = 5; public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new Kmeans(), args); System.exit(res); } @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job initJob = new Job(conf, "K-means Init Job"); FileInputFormat.setInputPaths(initJob, new Path(args[0])); FileOutputFormat.setOutputPath(initJob, new Path("/Hadoop/Kmeans/initResults")); initJob.setMapperClass(KmeansInitStep1Mapper.class); initJob.setReducerClass(KmeansInitStep2Reducer.class); initJob.setInputFormatClass(TextInputFormat.class); initJob.setOutputFormatClass(SequenceFileOutputFormat.class); initJob.setOutputKeyClass(IntWritable.class); initJob.setOutputValueClass(WriteableData.class); initJob.setJarByClass(Kmeans.class); initJob.waitForCompletion(true); int a = 1; for (int i = 0; i < nIterations; i++) { Job job = new Job(conf, "K-means compute Job"); FileInputFormat.setInputPaths(job, new Path(args[0])); if (i == nIterations - 1) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { FileOutputFormat.setOutputPath(job, new Path("/Hadoop/Kmeans/ResultsIter" + i)); } job.setMapperClass(KmeansStep1Mapper.class); job.setReducerClass(KmeansStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(Kmeans.class); if (job.waitForCompletion(true)) { a = 0; } else { a = 1; } ; } return a; } }