List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths
public static void setInputPaths(JobConf conf, Path... inputPaths)
From source file:NaivePageRank.java
License:Apache License
public static void main(String[] args) throws Exception { int iteration = -1; String inputPath = args[0];/*from w w w. ja va2 s. com*/ String outputPath = args[1]; int specIteration = 0; if (args.length > 2) { specIteration = Integer.parseInt(args[2]); } int numNodes = 100000; if (args.length > 3) { numNodes = Integer.parseInt(args[3]); } int numReducers = 32; if (args.length > 4) { numReducers = Integer.parseInt(args[4]); } System.out.println("specified iteration: " + specIteration); long start = System.currentTimeMillis(); /** * job to count out-going links for each url */ JobConf conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CountMapper.class); conf.setReducerClass(CountReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/count")); conf.setNumReduceTasks(numReducers); JobClient.runJob(conf); /******************** Initial Rank Assignment Job ***********************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Initialize"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(InitialRankAssignmentMapper.class); conf.setReducerClass(InitialRankAssignmentReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); // conf.setIterative(false); JobClient.runJob(conf); iteration++; do { /****************** Join Job ********************************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Join"); conf.setOutputKeyClass(Text.class); // conf.setOutputValueClass(Text.class); conf.setMapperClass(ComputeRankMap.class); conf.setReducerClass(ComputeRankReduce.class); conf.setMapOutputKeyClass(TextPair.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setPartitionerClass(FirstPartitioner.class); conf.setOutputKeyComparatorClass(KeyComparator.class); conf.setOutputValueGroupingComparator(GroupComparator.class); // relation table FileInputFormat.setInputPaths(conf, new Path(inputPath)); // rank table FileInputFormat.addInputPath(conf, new Path(outputPath + "/i" + (iteration - 1))); // count table FileInputFormat.addInputPath(conf, new Path(outputPath + "/count")); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); JobClient.runJob(conf); iteration++; /******************** Rank Aggregate Job ***********************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Aggregate"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapperClass(RankAggregateMapper.class); conf.setReducerClass(RankAggregateReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(outputPath + "/i" + (iteration - 1))); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); conf.setInt("haloop.num.nodes", numNodes); JobClient.runJob(conf); iteration++; } while (iteration < 2 * specIteration); long end = System.currentTimeMillis(); System.out.println("running time " + (end - start) / 1000 + "s"); }
From source file:IndexWords.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { return -1; }/* w w w . j a v a 2 s. co m*/ checkWords = new String[args.length - 2]; int numIter = 5; Path input = new Path(args[0]); for (int i = 0; i < numIter; i++) { JobConf conf = new JobConf(getConf(), IndexWords.class); conf.setJobName("indexwords"); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, new Path(args[1] + Integer.toString(i))); RunningJob rj = JobClient.runJob(conf); input = new Path(args[1] + Integer.toString(i)); double resVal = rj.getCounters().getCounter(RecordCounters.RESIDUAL_COUNTER) * 1.0 / 10000; System.out.println(N + " " + (resVal / (1.0 * N))); if (resVal / (1.0 * N) < 0.001) break; } return 0; }
From source file:ClimateData.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ClimateData.class); conf.setJobName("climatedata"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);// w ww .java 2 s . co m }
From source file:FormatStorage2ColumnStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatStorage2ColumnStorageMR <input> <output>"); System.exit(-1);/*from w w w . j a v a 2 s . com*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("FormatStorage2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(FormatStorageMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(FormatStorageInputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:TestColumnStorageInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {//from ww w . j av a2 s . c om if (argv.length != 2) { System.out.println("TestColumnStorageInputFormat <input> idx"); System.exit(-1); } JobConf conf = new JobConf(TestColumnStorageInputFormat.class); conf.setJobName("TestColumnStorageInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); conf.set("hive.io.file.readcolumn.ids", argv[1]); FormatStorageSerDe serDe = initSerDe(conf); StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new ColumnStorageInputFormat(); long begin = System.currentTimeMillis(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); long end = System.currentTimeMillis(); System.out.println("getsplit delay " + (end - begin) + " ms"); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int size = inputSplits.length; System.out.println("getSplits return size:" + size); for (int i = 0; i < size; i++) { ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i]; System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength() + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:" + split.getEndLine()); if (split.getFileName() != null) { System.out.println("fileName:" + split.getFileName()); } else { System.out.println("fileName null"); } if (split.fileList() != null) { System.out.println("fileList.num:" + split.fileList().size()); for (int j = 0; j < split.fileList().size(); j++) { System.out.println("filelist " + j + ":" + split.fileList().get(j)); } } } while (true) { int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Record record = (Record) value; Object row = serDe.deserialize(record); count++; } end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay + "\n"); } } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:BP.java
License:Apache License
protected JobConf configInitMessage() throws Exception { final JobConf conf = new JobConf(getConf(), BP.class); conf.set("nstate", "" + nstate); conf.set("compat_matrix_str", "" + edge_potential_str); conf.setJobName("BP_Init_Belief"); conf.setMapperClass(MapInitializeBelief.class); fs.delete(message_cur_path, true);/* www. j av a 2 s. co m*/ FileInputFormat.setInputPaths(conf, edge_path); FileOutputFormat.setOutputPath(conf, message_cur_path); conf.setNumReduceTasks(0); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:BP.java
License:Apache License
protected JobConf configSumErr() throws Exception { final JobConf conf = new JobConf(getConf(), BP.class); conf.set("nstate", "" + nstate); conf.setJobName("BP_Sum Err"); fs.delete(sum_error_path, true);//from w w w. ja v a 2 s . c om conf.setMapperClass(MapSumErr.class); conf.setReducerClass(RedSumErr.class); FileInputFormat.setInputPaths(conf, check_error_path); FileOutputFormat.setOutputPath(conf, sum_error_path); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:FriendsJob.java
License:Apache License
/** * @param args//w w w .j a v a 2 s.c o m */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FriendsJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FriendsMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(FriendsReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:AnagramJob.java
License:Apache License
/** * @param args/* w ww . ja va2 s . c o m*/ */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AnagramJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AnagramMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(AnagramReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:CountHistogram.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {//from w w w . java2 s .c o m JobClient client = new JobClient(); JobConf job = new JobConf(getConf(), CountHistogram.class); job.setJobName("CountHistogram"); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); client.setConf(job); JobClient.runJob(job); } catch (Exception e) { e.printStackTrace(); throw e; } return 0; }