List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored) throws IOException
From source file:edu.umd.gorden2.StripesPMI.java
License:Apache License
/** * Runs this tool.//w w w . java2 s.c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + StripesPMI.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(StripesPMI.class.getSimpleName()); job.setJarByClass(StripesPMI.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapStFW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.addCacheFile(new URI("wc/part-r-00000")); // // wordcount job Job job2 = Job.getInstance(getConf()); job2.setJobName("Wordcount"); job2.setJarByClass(PairsPMI.class); String outputPath2 = "wc"; // Delete the output directory if it exists already. Path outputDir2 = new Path(outputPath2); FileSystem.get(getConf()).delete(outputDir2, true); job2.setNumReduceTasks(1); FileInputFormat.setInputPaths(job2, new Path(inputPath)); FileOutputFormat.setOutputPath(job2, new Path(outputPath2)); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(IntWritable.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); job2.setMapperClass(MyMapper2.class); job2.setCombinerClass(MyReducer2.class); job2.setReducerClass(MyReducer2.class); long startTime = System.currentTimeMillis(); job2.waitForCompletion(true); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.honghongie.BuildInvertedIndexCompressed.java
License:Apache License
/** * Runs this tool.//from w w w . j av a 2s. c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BuildInvertedIndexCompressed.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndexCompressed.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexCompressed.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStringLong.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PairOfWritables.class); job.setOutputFormatClass(MapFileOutputFormat.class); //why mapfileoutputformat? // job.setOutputFormatClass(SequenceFileOutputFormat); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.honghongie.BuildInvertedIndexHBase.java
License:Apache License
/** * Runs this tool.//from ww w . j ava 2 s . com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption( OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); // If the table doesn't exist, create it Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); // Now we are ready to start running mapreduce LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output table: " + outputTable); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndexHBase.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBase.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setMapperClass(MyMapper.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.honghongie.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.// www .j a va2 s . com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("node").hasArg() .withDescription("source node (i.e., destination of the random jump)").create(SOURCE)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(SOURCE)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String source = cmdline.getOptionValue(SOURCE); //get source information from cmdline LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - source: " + source); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.set(SOURCE_NODES, source); //set source node and pass it to mapper setup Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:edu.umd.honghongie.ExtractTopPersonalizedPageRankNodes.java
License:Apache License
private void iterateSort(int i, int source, int n, String input, String output) throws Exception { Configuration conf = getConf(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setInt("n", n); conf.setInt("times", i); conf.setInt("source", source); Job job = Job.getInstance(conf); job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + input); job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class); job.setNumReduceTasks(1);/*from w ww . j a va 2 s . co m*/ FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output + Integer.toString(i))); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(output + Integer.toString(i)), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:edu.umd.honghongie.PairsPMI.java
License:Apache License
/** * Runs this tool./*w ww . ja v a 2 s. c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); // options.addOption(OptionBuilder.withArgName("num").hasArg() // .withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; // int window = cmdline.hasOption(WINDOW) ? // Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + PairsPMI.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); // LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); //JobConf conf = new JobConf(PairsPMI.class); // first job //Job job1 = new Job (conf,"join1"); Configuration conf1 = getConf(); Job job1 = Job.getInstance(conf1); job1.setJobName(PairsPMI.class.getSimpleName()); job1.setJarByClass(PairsPMI.class); job1.setNumReduceTasks(1); //ensure go to one file //file path of job1 // Delete the output directory if it exist Path dir = new Path("temp"); FileSystem.get(getConf()).delete(dir, true); FileInputFormat.setInputPaths(job1, new Path(inputPath)); FileOutputFormat.setOutputPath(job1, new Path("temp")); job1.setMapperClass(Map_First.class); job1.setCombinerClass(MyCombiner.class); job1.setReducerClass(Reduce_First.class); job1.setMapOutputKeyClass(Text.class);//map output key job1.setMapOutputValueClass(IntWritable.class);//map output value job1.setOutputKeyClass(Text.class);//reduce output key job1.setOutputValueClass(IntWritable.class);//reduce output value // ControlledJob ctrljob1=new ControlledJob(conf); // ctrljob1.setJob(job1); long startTime1 = System.currentTimeMillis(); job1.waitForCompletion(true); System.out.println( "First Job Finished in " + (System.currentTimeMillis() - startTime1) / 1000.0 + " seconds"); //begin job2 //Configuration conf2 = getConf(); Job job2 = Job.getInstance(getConf()); job2.setJobName(PairsPMI.class.getSimpleName()); job2.setJarByClass(PairsPMI.class); job2.setNumReduceTasks(reduceTasks); //delete the output directory if it exists. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); //file path of job2 FileInputFormat.setInputPaths(job2, new Path(inputPath)); FileOutputFormat.setOutputPath(job2, new Path(outputPath)); job2.addCacheFile(new URI("temp/part-r-00000")); job2.setMapperClass(Map_Second.class); job2.setCombinerClass(MyCombiner_Second.class); job2.setReducerClass(Reduce_Second.class); job2.setMapOutputKeyClass(PairOfStrings.class);//map output key job2.setMapOutputValueClass(FloatWritable.class);//map output value job2.setOutputKeyClass(PairOfStrings.class);//reduce output key job2.setOutputValueClass(FloatWritable.class);//reduce output value long startTime2 = System.currentTimeMillis(); job2.waitForCompletion(true); System.out.println( "Second Job Finished in " + (System.currentTimeMillis() - startTime2) / 1000.0 + " seconds"); System.out.println( "Total Job Finished in " + (System.currentTimeMillis() - startTime1) / 1000.0 + " seconds"); System.out.println("Total number of lines:" + lines); return 0; }
From source file:edu.umd.honghongie.PartitionGraph.java
License:Apache License
/** * Runs this tool./*from w ww . j a v a 2 s . c om*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); // job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:edu.umd.honghongie.RunPersonalizedPageRankBasic.java
License:Apache License
private ArrayListOfFloats phase1(int i, int j, String basePath, int numNodes, ArrayListOfInts sourceids, boolean useCombiner) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase1"); job.setJarByClass(RunPersonalizedPageRankBasic.class); String in = basePath + "/iter" + formatter.format(i); String out = basePath + "/iter" + formatter.format(j) + "t"; String outm = out + "-mass"; // We need to actually count the number of part files to get the number of partitions (because // the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) numPartitions++;/*from w ww . ja va2 s . c o m*/ } LOG.info("PageRank: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + numNodes); LOG.info(" - useCombiner: " + useCombiner); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); job.getConfiguration().set("PageRankMassPath", outm); //*********************** reduer uses sourcenode job.getConfiguration().set("SourceNode", sourceids.toString()); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapClass.class); if (useCombiner) { job.setCombinerClass(CombineClass.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(getConf()).delete(new Path(out), true); FileSystem.get(getConf()).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); System.out.println("********** 1 *********"); ArrayListOfFloats mass = new ArrayListOfFloats(); int length = sourceids.size(); System.out.println("*********** 1 **********" + length); float test = Float.NEGATIVE_INFINITY; for (int k = 0; k < length; k++) { mass.add(Float.NEGATIVE_INFINITY); //use add to initialize } System.out.println("********** test ********" + test); System.out.println("******** 1 ********" + mass); //****************************************** how to resolve datastream FileSystem fs = FileSystem.get(getConf()); ArrayListOfFloatsWritable invalue = new ArrayListOfFloatsWritable(); for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); //************************************** get all values from fin? invalue.readFields(fin); System.out.println("************** 1 ************" + invalue); for (int k = 0; k < invalue.size(); k++) { mass.set(k, sumLogProbs(mass.get(k), invalue.get(k))); } fin.close(); } System.out.println("******** 1 ********" + mass.toString()); return mass; }
From source file:edu.umd.honghongie.RunPersonalizedPageRankBasic.java
License:Apache License
private void phase2(int i, int j, ArrayListOfFloats missing, String basePath, int numNodes, ArrayListOfInts sourceids) throws Exception { System.out.println("************ start working in phase2***********"); Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase2"); job.setJarByClass(RunPersonalizedPageRankBasic.class); LOG.info("missing PageRank mass: " + missing.toString()); LOG.info("number of nodes: " + numNodes); //********// www . j a va 2 s. c om LOG.info("source node: " + sourceids.toString()); String in = basePath + "/iter" + formatter.format(j) + "t"; String out = basePath + "/iter" + formatter.format(j); LOG.info("PageRank: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().set("MissingMass", missing.toString()); job.getConfiguration().setInt("NodeCount", numNodes); //********* job.getConfiguration().set("SourceNode", sourceids.toString()); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapPageRankMassDistributionClass.class); FileSystem.get(getConf()).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:edu.umd.honghongie.StripesPMI.java
License:Apache License
/** * Runs this tool.//from w w w . j a v a 2s .c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); // options.addOption(OptionBuilder.withArgName("num").hasArg() // .withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; // int window = cmdline.hasOption(WINDOW) ? // Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + StripesPMI.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); // LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); //JobConf conf = new JobConf(PairsPMI.class); // first job //Job job1 = new Job (conf,"join1"); Configuration conf1 = getConf(); Job job1 = Job.getInstance(conf1); job1.setJobName(StripesPMI.class.getSimpleName()); job1.setJarByClass(StripesPMI.class); job1.setNumReduceTasks(1); //file path of job1 // Delete the output directory if it exist Path dir = new Path("temp"); FileSystem.get(getConf()).delete(dir, true); FileInputFormat.setInputPaths(job1, new Path(inputPath)); FileOutputFormat.setOutputPath(job1, new Path("temp")); job1.setMapperClass(Map_First.class); job1.setCombinerClass(MyCombiner.class); job1.setReducerClass(Reduce_First.class); job1.setMapOutputKeyClass(Text.class);//map output key job1.setMapOutputValueClass(IntWritable.class);//map output value job1.setOutputKeyClass(Text.class);//reduce output key job1.setOutputValueClass(IntWritable.class);//reduce output value // ControlledJob ctrljob1=new ControlledJob(conf); // ctrljob1.setJob(job1); long startTime1 = System.currentTimeMillis(); job1.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime1) / 1000.0 + " seconds"); //begin job2 //Configuration conf2 = getConf(); Job job2 = Job.getInstance(getConf()); job2.setJobName(StripesPMI.class.getSimpleName()); job2.setJarByClass(StripesPMI.class); job2.setNumReduceTasks(reduceTasks); //delete the output directory if it exists. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); //file path of job2 FileInputFormat.setInputPaths(job2, new Path(inputPath)); FileOutputFormat.setOutputPath(job2, new Path(outputPath)); job2.addCacheFile(new URI("temp/part-r-00000")); job2.setMapperClass(Map_Second.class); job2.setReducerClass(Reduce_Second.class); job2.setMapOutputKeyClass(Text.class);//map output key job2.setMapOutputValueClass(HMapStIW.class);//map output value job2.setOutputKeyClass(PairOfStrings.class);//reduce output key job2.setOutputValueClass(FloatWritable.class);//reduce output value long startTime2 = System.currentTimeMillis(); job2.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime2) / 1000.0 + " seconds"); System.out .println("Total Job Finished in" + (System.currentTimeMillis() - startTime1) / 1000.0 + " seconds"); System.out.println("total number of lines:" + lines); return 0; }