List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths
public static void setInputPaths(JobConf conf, Path... inputPaths)
From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java
License:Apache License
@Test(timeout = 5000) public void testExtraEvents() throws Exception { Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents"); JobConf jobConf = new JobConf(defaultConf); jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder(); builder.setGroupingEnabled(false);/*from w w w .ja va 2s .c om*/ builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)); byte[] payload = builder.build().toByteArray(); InputContext inputContext = createTezInputContext(payload); MultiMRInput input = new MultiMRInput(inputContext, 1); input.initialize(); List<Event> eventList = new ArrayList<Event>(); String file1 = "file1"; createInputData(localFs, workDir, jobConf, file1, 0, 10); SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(jobConf, 1); assertEquals(1, splits.length); MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]); InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer()); InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(1, splitProto.toByteString().asReadOnlyByteBuffer()); eventList.clear(); eventList.add(event1); eventList.add(event2); try { input.handleEvents(eventList); fail("Expecting Exception due to too many events"); } catch (Exception e) { assertTrue(e.getMessage().contains("Unexpected event. All physical sources already initialized")); } }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file) throws IOException { FileInputFormat.setInputPaths(job, workDir); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {/*from w w w . j a va 2 s . c o m*/ Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 10; i > 0; i--) { key.set(r.nextInt(1000)); value.set(Integer.toString(i)); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(job, 1); System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; " + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; " + "file = " + ((FileSplit) splits[0]).getPath()); return splits[0]; }
From source file:org.apache.trevni.avro.TestWordCount.java
License:Apache License
public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); wordCountUtil.writeLinesFile();//w w w.ja va 2s . c o m AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING, LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); wordCountUtil.validateCountsFile(); }
From source file:org.apache.trevni.avro.TestWordCount.java
License:Apache License
public void testInputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); Schema subSchema = Schema.parse("{\"type\":\"record\"," + "\"name\":\"PairValue\"," + "\"fields\": [ " + "{\"name\":\"value\", \"type\":\"long\"}" + "]}"); AvroJob.setInputSchema(job, subSchema); AvroJob.setMapperClass(job, Counter.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*")); job.setInputFormat(AvroTrevniInputFormat.class); job.setNumReduceTasks(0); // map-only job.setOutputFormat(NullOutputFormat.class); // ignore output total = 0;/*from w w w . ja v a 2 s.c o m*/ JobClient.runJob(job); assertEquals(WordCountUtil.TOTAL, total); }
From source file:org.apache.whirr.demo.WhirrHadoop.java
License:Apache License
public void runJob() throws Exception { Configuration conf = getConfiguration(); JobConf job = new JobConf(conf, WhirrHadoop.class); JobClient client = new JobClient(job); waitForTaskTrackers(client);// w w w .j av a 2 s . c om FileSystem fs = FileSystem.get(conf); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); reader.close(); }
From source file:org.apache.whirr.examples.HadoopClusterExample.java
License:Apache License
private void runWordCountingJob(Configuration config) throws IOException { JobConf job = new JobConf(config, HadoopClusterExample.class); FileSystem fs = FileSystem.get(config); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close();//from w ww . j a v a2s.c o m LOG.info("Wrote a file containing 'b a\\n'"); job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = reader.readLine(); int count = 0; while (line != null) { LOG.info("Line {}: {}", count, line); count += 1; line = reader.readLine(); } reader.close(); }
From source file:org.apache.whirr.service.cdh.integration.Cdh3HadoopServiceTest.java
License:Apache License
protected void checkHadoop(Configuration conf, JobClient client, JobConf job) throws Exception { FileSystem fs = FileSystem.get(conf); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close();/*from ww w.ja v a 2s . c o m*/ job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
From source file:org.apache.whirr.service.hadoop.HadoopServiceTest.java
License:Apache License
@Test public void test() throws Exception { Configuration conf = getConfiguration(); JobConf job = new JobConf(conf, HadoopServiceTest.class); JobClient client = new JobClient(job); waitForTaskTrackers(client);/*from w w w .j a v a2s .c om*/ FileSystem fs = FileSystem.get(conf); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
From source file:org.apache.whirr.service.hadoop.integration.AbstractHadoopServiceTest.java
License:Apache License
@Test(timeout = TestConstants.ITEST_TIMEOUT) public void test() throws Exception { Configuration conf = controller.getConfiguration(); JobConf job = new JobConf(conf, HadoopServiceTest.class); FileSystem fs = FileSystem.get(conf); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close();//ww w. j a v a 2s .c om job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
From source file:org.apache.whirr.service.hadoop.integration.HadoopServiceTest.java
License:Apache License
@Test public void test() throws Exception { Configuration conf = controller.getConfiguration(); JobConf job = new JobConf(conf, HadoopServiceTest.class); FileSystem fs = FileSystem.get(conf); OutputStream os = fs.create(new Path("input")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close();//from w w w .ja va2 s .c o m job.setMapperClass(TokenCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path("input")); FileOutputFormat.setOutputPath(job, new Path("output")); JobClient.runJob(job); FSDataInputStream in = fs.open(new Path("output/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }