List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.mycompany.keywordsearch.KeywordSearch.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set(FileInputFormat.INPUT_DIR_RECURSIVE, String.valueOf(true)); Path input = new Path(args[0]); Path output = new Path(args[1]); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); System.out.print("Keyword:\t"); conf.set(KEYWORD, in.readLine());//ww w. ja v a 2s.c o m Job job = Job.getInstance(conf, "word count"); job.setJarByClass(KeywordSearch.class); job.setInputFormatClass(TextInputFormatV2.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); clearOutput(conf, output); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.mycompany.searcher.Searcher.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); System.out.print("Please input a keyword:\t"); conf.set(KEYWORD, in.readLine());/*from ww w .j a v a2 s . c o m*/ conf.set(MINIMUM, args[2]); Job job = Job.getInstance(conf, "keyword search"); job.setJarByClass(Searcher.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); clearOutput(conf, new Path(args[1])); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); TimeUnit.SECONDS.sleep(1); in = new BufferedReader( new InputStreamReader(FileSystem.get(conf).open(new Path(args[1] + "/part-r-00000")), "UTF-8")); String line; HashMap<String, Integer> map = new HashMap(); while ((line = in.readLine()) != null) { StringTokenizer tok = new StringTokenizer(line); map.put(tok.nextToken(), new Integer(tok.nextToken())); } List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> entry1, Map.Entry<String, Integer> entry2) { return (entry2.getValue() - entry1.getValue()); } }); for (Map.Entry<String, Integer> entry : list) { in = new BufferedReader( new InputStreamReader(FileSystem.get(conf).open(new Path(entry.getKey())), "UTF-8")); System.out.println("\n" + in.readLine()); System.out.println("\n" + in.readLine() + ":" + entry.getValue() + "\n"); } }
From source file:com.netflix.bdp.s3.TestMRJob.java
License:Apache License
@Test public void testMRJob() throws Exception { FileSystem mockS3 = mock(FileSystem.class); FileSystem s3 = S3_OUTPUT_PATH.getFileSystem(getConfiguration()); if (s3 instanceof MockS3FileSystem) { ((MockS3FileSystem) s3).setMock(mockS3); } else {/*from w ww. ja v a 2 s . c om*/ throw new RuntimeException("Cannot continue: S3 not mocked"); } String commitUUID = UUID.randomUUID().toString(); int numFiles = 3; Set<String> expectedFiles = Sets.newHashSet(); for (int i = 0; i < numFiles; i += 1) { File file = temp.newFile(String.valueOf(i) + ".text"); try (FileOutputStream out = new FileOutputStream(file)) { out.write(("file " + i).getBytes(StandardCharsets.UTF_8)); } expectedFiles.add(new Path(S3_OUTPUT_PATH, "part-m-0000" + i + "-" + commitUUID).toString()); } Job mrJob = Job.getInstance(MR_CLUSTER.getConfig(), "test-committer-job"); Configuration conf = mrJob.getConfiguration(); mrJob.setOutputFormatClass(S3TextOutputFormat.class); S3TextOutputFormat.setOutputPath(mrJob, S3_OUTPUT_PATH); File mockResultsFile = temp.newFile("committer.bin"); mockResultsFile.delete(); String committerPath = "file:" + mockResultsFile; conf.set("mock-results-file", committerPath); conf.set(UPLOAD_UUID, commitUUID); mrJob.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(mrJob, new Path("file:" + temp.getRoot().toString())); mrJob.setMapperClass(M.class); mrJob.setNumReduceTasks(0); mrJob.submit(); Assert.assertTrue("MR job should succeed", mrJob.waitForCompletion(true)); TestUtil.ClientResults results; try (ObjectInputStream in = new ObjectInputStream( FileSystem.getLocal(conf).open(new Path(committerPath)))) { results = (TestUtil.ClientResults) in.readObject(); } Assert.assertEquals("Should not delete files", 0, results.deletes.size()); Assert.assertEquals("Should not abort commits", 0, results.aborts.size()); Assert.assertEquals("Should commit task output files", numFiles, results.commits.size()); Set<String> actualFiles = Sets.newHashSet(); for (CompleteMultipartUploadRequest commit : results.commits) { actualFiles.add("s3://" + commit.getBucketName() + "/" + commit.getKey()); } Assert.assertEquals("Should commit the correct file paths", expectedFiles, actualFiles); }
From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();/*from w w w . j ava 2 s .co m*/ Configuration conf = new Configuration(); String path = "hdfs://ns1:9000/user/root"; if (args.length != 0) { path = args[0]; } String[] args_1 = new String[] { path + "/chubao/input/contact", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" }; String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs(); // job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(JobControlTest.class); job.setMapperClass(UserIdMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // deleteOutputFile(otherArgs[1], otherArgs[0]); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // job Job job2 = Job.getInstance(conf, "job2"); job2.setJarByClass(JobControlTest.class); job2.setMapperClass(AddDateMapper.class); job2.setReducerClass(Job2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); // deleteOutputFile(otherArgs[2], otherArgs[1]); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); // ControlledJob ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); // ? controlledJob2.addDependingJob(controlledJob1); // JobControl JobControl jobControl = new JobControl("JobControlDemoGroup"); jobControl.addJob(controlledJob1); jobControl.addJob(controlledJob2); // ? Thread jobControlThread = new Thread(jobControl); jobControlThread.start(); while (true) { if (jobControl.allFinished()) { System.out.println(jobControl.getSuccessfulJobList()); jobControl.stop(); break; } } }
From source file:com.niuwa.hadoop.jobs.sample.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();/*from www. ja v a 2 s . co m*/ Configuration conf = new Configuration(); args = new String[] { "hdfs://192.168.101.219:9000/user/root/input", "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() }; String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.pinterest.terrapin.hadoop.examples.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { TerrapinUploaderOptions options = TerrapinUploaderOptions.initFromSystemProperties(); // Create the job, setting the inputs and map output key and map output value classes. // Also, set reducer and mapper. Job job = Job.getInstance(super.getConf(), "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); // Wrap around Hadoop Loader job to write the data to a terrapin fileset. return new HadoopJobLoader(options, job).waitForCompletion() ? 0 : 1; }
From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java
License:Open Source License
/** * This method is assuming fs.default.name as args[0] * * @param args/* w w w. ja v a 2 s . c o m*/ * @return * @throws Exception */ @Override public int run(String[] args) throws Exception { System.out.println("Starting MapReduce Job"); GfxdDataSerializable.initTypes(); Configuration conf = new Configuration(); //Configuration conf = getConf(); Path outputPath = new Path("/output"); String hdfsHomeDir = "/sensorStore"; //args[1]; String tableName = "RAW_SENSOR"; String outTableName = "LOAD_AVERAGES_SHADOW"; String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527"); // conf.set("fs.default.name", args[0]); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf); // Retrieve last run timestamp long now = System.currentTimeMillis(); long lastStart = getLastStart(hdfs); outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart); conf.setLong(RowInputFormat.END_TIME_MILLIS, now); conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL); conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName); // print config to troubleshoot possible issues // Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); Job job = Job.getInstance(conf, "LoadAverage"); job.setNumReduceTasks(1); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setJarByClass(LoadAverage.class); job.setMapperClass(LoadAverageMapper.class); job.setReducerClass(LoadAverageReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LoadKey.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(RowOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(LoadAverageModel.class); boolean jobSuccess = job.waitForCompletion(true); if (jobSuccess) { writeLastStart(hdfs, now); } return jobSuccess ? 0 : 1; }
From source file:com.rw.legion.DefaultJob.java
License:Apache License
/** * Main method./* w w w . j a va 2s . c o m*/ * * @param args Arguments should be: 1) input path, 2) output path, 3) * location of Legion objective file. */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Load the Legion objective from the JSON doc. Path path = new Path(args[2]); FileSystem fs = FileSystem.get(new URI(args[2]), conf); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String json = ""; String line = br.readLine(); while (line != null) { json += line; line = br.readLine(); } br.close(); /* * Save the JSON for the Legion objective to the Hadoop configuration, * so we can access it in other containers. */ conf.setStrings("legion_objective", json); // De-serialize the objective so we can access the settings here. LegionObjective legionObjective = ObjectiveDeserializer.deserialize(json); // Start configuring the MapReduce job. Job hadoopJob = Job.getInstance(conf, "Legion"); hadoopJob.setJarByClass(DefaultJob.class); hadoopJob.setMapperClass(DefaultMapper.class); LazyOutputFormat.setOutputFormatClass(hadoopJob, TextOutputFormat.class); // Compress the output to speed things up. TextOutputFormat.setCompressOutput(hadoopJob, true); TextOutputFormat.setOutputCompressorClass(hadoopJob, GzipCodec.class); // What input format do we use? try { @SuppressWarnings("unchecked") Class<? extends FileInputFormat<NullWritable, LegionRecord>> inputClass = (Class<? extends FileInputFormat<NullWritable, LegionRecord>>) Class .forName(legionObjective.getInputFormat()); hadoopJob.setInputFormatClass(inputClass); } catch (Exception e) { throw new JsonParseException( "Problem loading input format " + "class '" + legionObjective.getInputFormat() + "'"); } // Should we set a max combined size? if (legionObjective.getMaxCombinedSize() != null) { CombineFileInputFormat.setMaxInputSplitSize(hadoopJob, legionObjective.getMaxCombinedSize()); } /* * These are just static convenience methods, so it doesn't matter if * they come from the wrong class. */ FileInputFormat.setInputDirRecursive(hadoopJob, true); FileInputFormat.addInputPath(hadoopJob, new Path(args[0])); FileOutputFormat.setOutputPath(hadoopJob, new Path(args[1])); // Since a Legion objective can specify multiple output tables. for (OutputTable outputTable : legionObjective.getOutputTables()) { MultipleOutputs.addNamedOutput(hadoopJob, outputTable.getTitle(), TextOutputFormat.class, NullWritable.class, Text.class); } MultipleOutputs.addNamedOutput(hadoopJob, "skipped", TextOutputFormat.class, NullWritable.class, Text.class); hadoopJob.waitForCompletion(true); }
From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java
License:Apache License
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0];/*from w w w .j a v a 2 s . c o m*/ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(FindBadMOBReferences.class); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setBatch(10); scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); scan.addFamily(Bytes.toBytes("J")); //scan.setRowPrefixFilter(Bytes.toBytes("a00")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class, Text.class, job); //job.setNumReduceTasks(0); job.setReducerClass(FindBadMOBReferencesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path("/tmp/out")); return job; }
From source file:com.sa.npopa.samples.hbase.myMR.java
License:Apache License
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0];//from w w w .j av a2 s. c o m Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(myMR.class); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setBatch(10); //scan.setFilter(new FirstKeyOnlyFilter()); //need to find another filter like key only. scan.setFilter(new KeyOnlyFilter()); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }