List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w w w . jav a 2 s. c om * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; TableName tableName = TableName.valueOf(args[0]); Path inputDir = new Path(args[1]); // set filter conf.set(EASTCOM_FILTER_PARAMS, args[3]); conf.set(EASTCOM_FILTER_DEFINE, args[4]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) { String fileLoc = conf.get(CREDENTIALS_LOCATION); Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf); job.getCredentials().addAll(cred); } if (hfileOutPath != null) { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) { LOG.warn(errorMsg); // TODO: this is backwards. Instead of depending on the existence of a table, // create a sane splits file for HFileOutputFormat based on data sampling. createTable(admin, tableName, columns); } else { LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } } try (HTable table = (HTable) connection.getTable(tableName)) { boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false); // if no.strict is false then check column family if (!noStrict) { ArrayList<String> unmatchedFamilies = new ArrayList<String>(); Set<String> cfSet = getColumnFamilies(columns); HTableDescriptor tDesc = table.getTableDescriptor(); for (String cf : cfSet) { if (tDesc.getFamily(Bytes.toBytes(cf)) == null) { unmatchedFamilies.add(cf); } } if (unmatchedFamilies.size() > 0) { ArrayList<String> familyNames = new ArrayList<String>(); for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) { familyNames.add(family.getNameAsString()); } String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n"; usage(msg); System.exit(-1); } } job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); if (mapperClass.equals(TsvImporterTextMapper.class)) { job.setMapOutputValueClass(Text.class); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); } HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.class.toString() + " or custom mapper whose value type is Put."); System.exit(-1); } // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); } } return job; }
From source file:ReverseIndexer.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ReverseIndexer <output> <input file(s)>"); System.exit(2);//w ww .j a v a 2 s . c o m } Job job = new Job(conf, "reverse indexer"); job.setJarByClass(ReverseIndexer.class); job.setMapperClass(IndexerMapper.class); job.setReducerClass(IndexerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LineRecWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 1; i < otherArgs.length; i++) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Authset.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/* ww w . j ava 2 s .c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(Authset.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setNumReduceTasks(10); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); //DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(), //job.getConfiguration()); //DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:DumpRecordsExtended.java
License:Apache License
/** * Runs this tool.//w w w. jav a 2 s.c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName()); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); Configuration conf = new Configuration(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(DumpRecordsExtended.class.getSimpleName()); job.setJarByClass(DumpRecordsExtended.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:StripesPMI_nocombiner.java
License:Apache License
/** * Runs this tool.//w w w. j a va2 s .c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT) + "_TMP";// cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + StripesPMI_nocombiner.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Job job_first = Job.getInstance(getConf()); job_first.setJobName(StripesPMI_nocombiner.class.getSimpleName()); job_first.setJarByClass(StripesPMI_nocombiner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job_first.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_first, new Path(inputPath)); FileOutputFormat.setOutputPath(job_first, new Path(outputPath)); job_first.setMapOutputKeyClass(Text.class); job_first.setMapOutputValueClass(String2IntOpenHashMapWritable.class); job_first.setOutputKeyClass(PairOfStrings.class);// Text.class);// PairOfStrings.class); job_first.setOutputValueClass(DoubleWritable.class); job_first.setOutputFormatClass(TextOutputFormat.class);// changed job_first.setMapperClass(MyMapper_first.class); // job_first.setCombinerClass(MyCombiner.class); job_first.setReducerClass(MyReducer_first.class); long startTime = System.currentTimeMillis(); job_first.waitForCompletion(true); // ////////////////START.: run the second MR job to just aggregate result//////////////// inputPath = outputPath;// cmdline.getOptionValue(INPUT); outputPath = cmdline.getOptionValue(OUTPUT); Job job_second = Job.getInstance(getConf()); job_second.setJobName(StripesPMI_nocombiner.class.getSimpleName()); job_second.setJarByClass(StripesPMI_nocombiner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job_second.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_second, new Path(inputPath)); FileOutputFormat.setOutputPath(job_second, new Path(outputPath)); job_second.setMapOutputKeyClass(Text.class); job_second.setMapOutputValueClass(DoubleWritable.class); job_second.setOutputKeyClass(Text.class);// PairOfStrings.class); job_second.setOutputValueClass(DoubleWritable.class); // job_second.setOutputFormatClass(TextOutputFormat.class);// changed job_second.setMapperClass(MyMapper_second.class); // job_second.setCombinerClass(MyCombiner.class); job_second.setReducerClass(MyReducer_second.class); job_second.waitForCompletion(true); // END//////////// System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:BuildInvertedIndex.java
License:Apache License
/** * Runs this tool.//from w w w.j a v a2s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndex.class.getSimpleName()); job.setJarByClass(BuildInvertedIndex.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PairOfWritables.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:RunPageRankSchimmy.java
License:Apache License
private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner, boolean useRange) throws Exception { Configuration conf = getConf(); String in = path + "/iter" + FORMAT.format(i); String out = path + "/iter" + FORMAT.format(j) + "t"; String outm = out + "-mass"; FileSystem fs = FileSystem.get(conf); // We need to actually count the number of part files to get the number // of partitions (because the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) { numPartitions++;//from ww w .j a v a 2 s .co m } } conf.setInt("NodeCount", n); Partitioner<IntWritable, Writable> p = null; if (useRange) { p = new RangePartitioner(); ((Configurable) p).setConf(conf); } else { p = new HashPartitioner<IntWritable, Writable>(); } // This is really annoying: the mapping between the partition numbers on // disk (i.e., part-XXXX) and what partition the file contains (i.e., // key.hash % #reducer) is arbitrary... so this means that we need to // open up each partition, peek inside to find out. IntWritable key = new IntWritable(); PageRankNode value = new PageRankNode(); FileStatus[] status = fs.listStatus(new Path(in)); StringBuilder sb = new StringBuilder(); for (FileStatus f : status) { if (!f.getPath().getName().contains("part-")) { continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath())); reader.next(key, value); int np = p.getPartition(key, value, numPartitions); reader.close(); LOG.info(f.getPath() + "\t" + np); sb.append(np + "=" + f.getPath() + ";"); } LOG.info(sb.toString().trim()); LOG.info("PageRankSchimmy: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + n); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInmapCombiner); LOG.info(" - numPartitions: " + numPartitions); LOG.info(" - useRange: " + useRange); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("PageRankMassPath", outm); conf.set("BasePath", in); conf.set("PartitionMapping", sb.toString().trim()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = Job.getInstance(conf); job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useInmapCombiner) { job.setMapperClass(MapWithInMapperCombiningClass.class); } else { job.setMapperClass(MapClass.class); } if (useCombiner) { job.setCombinerClass(CombineClass.class); } if (useRange) { job.setPartitionerClass(RangePartitioner.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(conf).delete(new Path(out), true); FileSystem.get(conf).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; }
From source file:RunPageRankSchimmy.java
License:Apache License
private void phase2(String path, int i, int j, int n, float missing) throws Exception { Configuration conf = getConf(); LOG.info("missing PageRank mass: " + missing); LOG.info("number of nodes: " + n); String in = path + "/iter" + FORMAT.format(j) + "t"; String out = path + "/iter" + FORMAT.format(j); LOG.info("PageRankSchimmy: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); Job job = Job.getInstance(conf); job.setJobName("PageRankSchimmy:iteration" + j + ":Phase2"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(0);//from w ww . j a v a 2 s. c o m FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapPageRankMassDistributionClass.class); conf.setFloat("MissingMass", (float) missing); conf.setInt("NodeCount", n); FileSystem.get(conf).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:PartitionGraph.java
License:Apache License
/** * Runs this tool.//from w w w . j a v a 2s. c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeMultiSrc.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeMultiSrc.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:Hw2Part1.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: <input file> <output directory>"); System.exit(2);//from w w w . ja v a 2 s.c o m } // FileSystem hdfs = FileSystem.get(conf); String target = "hdfs://localhost:9000/"; FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent Path outputpath = new Path(otherArgs[otherArgs.length - 1]); if (fs.exists(outputpath)) { fs.delete(outputpath, true); } Job job = Job.getInstance(conf, "Hw2Part1"); job.setJarByClass(Hw2Part1.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(InfoWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(InfoWritable.class); // add the input paths as given by command line for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } // add the output path as given by the command line FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }