List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.knewton.mrtool.example.JsonMRExample.java
License:Apache License
/** * //from w w w. j a va 2 s.com * @param args * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(new Configuration()); job.setInputFormatClass(RecommendationsInputFormat.class); RecommendationsInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(RecommendationWritable.class); job.waitForCompletion(true); }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation./*from w w w .ja v a 2 s . c om*/ * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.laizuozuoba.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from ww w . j av a 2 s.c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); Job job2 = new Job(conf, "uv"); job2.setJarByClass(WordCount.class); job2.setMapperClass(UVMapper.class); job2.setCombinerClass(UVReducer.class); job2.setReducerClass(UVReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2")); ControlledJob controlledJob = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); controlledJob2.addDependingJob(controlledJob); JobControl jc = new JobControl("123"); jc.addJob(controlledJob); jc.addJob(controlledJob2); Thread jcThread = new Thread(jc); jcThread.start(); while (true) { if (jc.allFinished()) { System.out.println(jc.getSuccessfulJobList()); jc.stop(); break; } if (jc.getFailedJobList().size() > 0) { System.out.println(jc.getFailedJobList()); jc.stop(); break; } Thread.sleep(1000); } System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!"); }
From source file:com.lakhani.anchorgraph.anchorgraph.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf); conf.set("numberCentroids", args[3]); conf.set("numberFeatures", args[4]); Job job = new Job(conf, "anchorgraph"); job.addCacheFile(new URI(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setJarByClass(anchorgraph.class); job.submit();//from ww w . j a v a 2 s.c o m int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.testCache.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "testCache"); job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(testCache.class); job.submit();//from w ww. j ava2s . com int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.wordcount.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setJarByClass(wordcount.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from w w w. j a va 2 s . c o m }
From source file:com.leon.hadoop.loganalyse.DistributedGrep.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2);/*from w ww . j a va 2 s . c o m*/ } @SuppressWarnings("deprecation") Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.leon.hadoop.loganalyse.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.nsrl.HashLoader.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 6) { System.err/*from w w w. j a v a 2 s. com*/ .println("Usage: HashLoader <mfgfile> <osfile> <prodfile> <hashfile> <outpath> <num_reducers>"); System.exit(2); } final String mfg_filename = otherArgs[0]; final String os_filename = otherArgs[1]; final String prod_filename = otherArgs[2]; final String hash_filename = otherArgs[3]; final String output_filename = otherArgs[4]; conf.set("mfg_filename", mfg_filename); conf.set("os_filename", os_filename); conf.set("prod_filename", prod_filename); conf.setLong("timestamp", System.currentTimeMillis()); SKJobFactory.addDependencies(conf); final Job job = new Job(conf, "HashLoader"); job.setJarByClass(HashLoader.class); job.setMapperClass(HashLoaderMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(Integer.parseInt(otherArgs[5])); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); TextInputFormat.addInputPath(job, new Path(hash_filename)); HFileOutputFormat.setOutputPath(job, new Path(output_filename)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.BlockHasher.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: BlockHasher <imageID> <image> <output>"); return 2; }/* w w w .ja v a2s .c o m*/ final String imageID = args[0]; final String image = args[1]; final String output = args[2]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, image, "BlockHasher", conf); job.setJarByClass(BlockHasher.class); job.setMapperClass(BlockHashMapper.class); // job.setReducerClass(Reducer.class); job.setNumReduceTasks(0); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(MD5Hash.class); FileOutputFormat.setOutputPath(job, new Path(output)); conf.setInt("mapred.job.reuse.jvm.num.tasks", -1); return job.waitForCompletion(true) ? 0 : 1; }