List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//from w ww.ja v a 2 s . c o m return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String regex = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_REGEX, regex); Job job = Job.getInstance(conf, "tidoop-mr-lib-filter"); job.setNumReduceTasks(1); job.setJarByClass(Filter.class); job.setMapperClass(LineFilter.class); job.setCombinerClass(LinesCombiner.class); job.setReducerClass(LinesJoiner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//from ww w. ja v a2 s.c om return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String mapFunction = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_FUNCTION, mapFunction); Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly"); job.setNumReduceTasks(0); job.setJarByClass(MapOnly.class); job.setMapperClass(CustomMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.test.hadoop.unoExample.CardDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { String input, output;/*from w ww .j av a2 s.c o m*/ if (args.length == 2) { input = args[0]; output = args[1]; } else { System.err.println("Incorrect number of arguments. Expected: input output"); return -1; } Job job = new Job(getConf()); job.setJarByClass(CardDriver.class); job.setJobName(this.getClass().getName()); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setMapperClass(CardMapper.class); job.setReducerClass(CardTotalReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;//from w w w . ja va 2 s. c om if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMult.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMult.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB RowBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;/*from w ww . ja v a2 s. co m*/ if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMultiplication.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB ColBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); if (args[2].equals("yes")) { conf.set("transpose", "true"); // transpose } else {/* w ww .java2s.co m*/ conf.set("transpose", "false"); // } Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(TransformMatrix.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java
/** * Runs a job/*from ww w .ja v a2s . c o m*/ * * @param theJob the MapReduceJob to be run * @param verbose if true, output progress information */ public static void run(MapReduceJob theJob, boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = GetConfiguration.get(); Job job = Job.getInstance(conf, theJob.toString()); job.setJarByClass(distributedHadoopDriver.class); job.setMapperClass(theJob.getMapper().getClass()); job.setReducerClass(theJob.getReducer().getClass()); job.setMapOutputKeyClass(theJob.getKeyType()); job.setMapOutputValueClass(theJob.getValueType()); theJob.writeConfig(job.getConfiguration()); hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile()); if (!input.equals(theJob.getInput().getFile())) { garbage_collector.noteCreated(input); } if (theJob.getClass().equals(join.class)) { join jobLeftJoin = (join) theJob; hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile()); if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) { garbage_collector.noteCreated(input2); } Mapper maps[] = jobLeftJoin.getMapperPairs(); MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass()); MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass()); } else { MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class); } job.getConfiguration().set(TextOutputFormat.SEPERATOR, ""); job.setOutputFormatClass(TextOutputFormat.class); //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation())); Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode()); FileOutputFormat.setOutputPath(job, out); boolean success = job.waitForCompletion(true); if (!success) { System.err.println("Error processing " + theJob); return; } FileSystem fs = FileSystem.get(GetConfiguration.get()); fs.delete(new Path(out, "_SUCCESS"), false); table output = new table(new hdfsFile(out), theJob.getOutput().getColNames()); output.setSeperator(theJob.getOutput().getSeperator()); theJob.setOutput(output); garbage_collector.noteCreated(output.getFile()); }
From source file:com.tomslabs.grid.avro.AvroWordCount.java
License:Apache License
public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath) throws IOException { conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString()); conf.setInt("mapred.max.split.size", 1024000); conf.setInt("mapred.reduce.tasks", 10); conf.setBoolean("mapred.reduce.tasks.speculative.execution", true); final Job job = new Job(conf, "Word Count"); job.setJarByClass(AvroWordCount.class); job.setInputFormatClass(AvroFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(GenericRecord.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(AvroFileOutputFormat.class); AvroFileOutputFormat.setDeflateLevel(job, 3); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;/* w w w .j a v a2 s .c o m*/ }
From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java
License:Apache License
/** * <div lang="ja">/*w w w.j av a 2s .c o m*/ * WordCount?MapReduce??? * @param args * @return ???0????????1 * @throws Exception ?????? * </div><div lang="en"> * Run a MapReduce job of WordCount. * @param args command argument * @return 0 for normal termination of the job and 1 otherwise * @throws Exception processing failed. * </div> */ public int run(String[] args) throws Exception { GSConf gsConf = new GSConf(); gsConf.parseArg(args); Configuration conf = getConf(); gsConf.setup(conf); Job job = Job.getInstance(conf, APP_NAME); job.setJarByClass(GSWordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(GSRowWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(GSRowInputFormat.class); job.setOutputFormatClass(GSRowOutputFormat.class); int res = job.waitForCompletion(true) ? 0 : 1; if (res == 0) { printResult(gsConf); } return res; }
From source file:com.trexinhca.TrexinHCATest.java
License:Apache License
public static void main(String[] args) throws Exception { ks = KieServices.Factory.get();/*from ww w .j av a2s. c o m*/ kContainer = ks.getKieClasspathContainer(); ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf); job.setJobName("HCATest"); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(TrexinHCAReducer.class); job.setReducerClass(TrexinHCAReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(TextOutputFormat.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.setJarByClass(TrexinHCATest.class); job.waitForCompletion(true); }