List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths
public static void setInputPaths(JobConf conf, Path... inputPaths)
From source file:Text2FormatStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatFileMR <input> <output>"); System.exit(-1);/* w w w .jav a 2 s.c o m*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("Text2FormatMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(TextFileTestMapper.class); conf.setReducerClass(FormatFileTestReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:TestFormatStorageInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {//from w w w . j a va 2 s . c o m if (argv.length != 2) { System.out.println("TestFormatStorageInputFormat <input> <output>"); System.exit(-1); } JobConf conf = new JobConf(TestFormatStorageInputFormat.class); conf.setJobName("TestFormatStorageInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); Head head = new Head(); initHead(head); head.toJobConf(conf); FormatStorageSerDe serDe = initSerDe(conf); StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new FormatStorageInputFormat(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int size = inputSplits.length; System.out.println("getSplits return size:" + size); for (int i = 0; i < size; i++) { FormatStorageSplit split = (FormatStorageSplit) inputSplits[i]; System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:" + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:" + split.getEndLine() + "\n"); } { int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); long begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Record record = (Record) value; Object row = serDe.deserialize(record); count++; } long end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay); } } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:BMTColumnLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), BMTColumnLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setJobName("BMTColumnLoader"); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from ww w . j a v a 2 s.c o m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]); } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:Text2ColumntStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Text2ColumnStorageMR <input> <output> <columnStorageMode>"); System.exit(-1);// w w w. j av a 2 s . c o m } JobConf conf = new JobConf(Text2ColumntStorageMR.class); conf.setJobName("Text2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(TextFileMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat((Class<? extends OutputFormat>) ColumnStorageHiveOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); int bt = Integer.valueOf(args[2]); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:TestTextInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {/*from ww w.jav a2 s .co m*/ if (argv.length != 2) { System.out.println("TestTextInputFormat <input> <output>"); System.exit(-1); } JobConf conf = new JobConf(TestTextInputFormat.class); conf.setJobName("TestTextInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); LazySimpleSerDe serDe = initSerDe(conf); LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new TextInputFormat(); ((TextInputFormat) inputFormat).configure(conf); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); long begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Object row = serDe.deserialize((Text) value); oi.getStructFieldsDataAsList(row); count++; } long end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay); return; } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:MapReduceRunner.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf conf = new JobConf(MapReduceRunner.class); conf.setJobName("gapdeduce"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Mapper.class); conf.setReducerClass(Reducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w . j av a2 s. com*/ }
From source file:LinkReverser.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.// w ww . j a v a 2 s .c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), LinkReverser.class); conf.setJobName("indexreverser"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:update_sentiment.java
License:LGPL
public static void runjob(String input, String output) throws Exception { JobConf conf = new JobConf(update_sentiment.class); conf.setJobName("Update_Sentiment_Train"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf);/*from w w w .j ava2 s . com*/ }
From source file:BMTTableLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), BMTTableLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setJobName("BMTTableLoader"); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w w w .java 2 s .co m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]); } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:BMTKeyValueLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), CassandraTableLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("BMTKeyValueLoader"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]);/* w ww . j a v a2 s. co m*/ } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }