Java tutorial
/* # DARPA XDATA licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Copyright 2013 Raytheon BBN Technologies Corp. All Rights Reserved. # */ package jobs; import java.io.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.*; import org.apache.hadoop.mapreduce.Job; //import com.bbn.fileformats.*; import fileformats.*; //import com.bbn.algebra.hadoop.mappers.*; import mappers.*; //import com.bbn.algebra.hadoop.reducers.* import reducers.*; public class CreateUniformDoublyStochastic extends Configured implements Tool { public int run(String[] args) throws Exception { Configuration conf = getConf(); int N = Integer.parseInt(args[2]); conf.setInt("N", N); int sR = Integer.parseInt(args[3]); conf.setInt("SR", sR); int sC = Integer.parseInt(args[4]); conf.setInt("SC", sC); String delim = args[5]; conf.set("DELIM", delim); conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[6])); conf.set("RESNAME", args[1]); //heap space - should be entered with the -D format and not dealt with by the program. conf.set("mapred.map.child.java.opts", "-Xmx3G"); conf.set("mapred.reduce.child.java.opts", "-Xmx3G"); //Create the File that We are mapping. //open the file in hdfs Path outFile = new Path(args[0]); FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(outFile); //write out an entry for the block int nR = N / sR + (N % sR > 0 ? 1 : 0); int nC = N / sC + (N % sC > 0 ? 1 : 0); for (int r = 0; r < nR; r++) { for (int c = 0; c < nC; c++) { out.writeUTF(String.valueOf(r) + delim + String.valueOf(c) + "\n"); } } //close file out.close(); //job Job job1 = new Job(conf, "CreateUniformDoubleStochastic"); job1.setJarByClass(CreateUniformDoublyStochastic.class); // Map FileInputFormat.addInputPath(job1, outFile); job1.setInputFormatClass(TextInputFormat.class); job1.setMapperClass(UniformDoublyStochasticMapper.class); //Reduce job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(MatrixBlock.class); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setOutputFormatClass(SequenceFileOutputFormat.class); //job1.setOutputFormatClass(TextOutputFormat.class); return job1.waitForCompletion(false) ? 0 : 1; } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); int res = ToolRunner.run(conf, new CreateUniformDoublyStochastic(), args); System.exit(res); } }