Example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths.

Prototype

public static void setInputPaths(JobConf conf, Path... inputPaths)

Source Link

Document

Set the array of Path s as the list of inputs for the map-reduce job.

Usage

From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java

License:Apache License

@Test(timeout = 5000)
public void testExtraEvents() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);

    MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
    builder.setGroupingEnabled(false);/*from w  w  w  .ja va  2s .c om*/
    builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf));
    byte[] payload = builder.build().toByteArray();

    InputContext inputContext = createTezInputContext(payload);

    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    List<Event> eventList = new ArrayList<Event>();

    String file1 = "file1";
    createInputData(localFs, workDir, jobConf, file1, 0, 10);
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 1);
    assertEquals(1, splits.length);

    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0,
            splitProto.toByteString().asReadOnlyByteBuffer());
    InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(1,
            splitProto.toByteString().asReadOnlyByteBuffer());

    eventList.clear();
    eventList.add(event1);
    eventList.add(event2);
    try {
        input.handleEvents(eventList);
        fail("Expecting Exception due to too many events");
    } catch (Exception e) {
        assertTrue(e.getMessage().contains("Unexpected event. All physical sources already initialized"));
    }
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file)
        throws IOException {
    FileInputFormat.setInputPaths(job, workDir);

    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {/*from w  w  w .  j a va  2 s . c o m*/
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 10; i > 0; i--) {
            key.set(r.nextInt(1000));
            value.set(Integer.toString(i));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }

    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(job, 1);
    System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; "
            + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; "
            + "file = " + ((FileSplit) splits[0]).getPath());
    return splits[0];
}

From source file:org.apache.trevni.avro.TestWordCount.java

License:Apache License

public void testOutputFormat() throws Exception {
    JobConf job = new JobConf();

    WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");

    wordCountUtil.writeLinesFile();//w  w w.ja  va  2s .  c o m

    AvroJob.setInputSchema(job, STRING);
    AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING, LONG));

    AvroJob.setMapperClass(job, MapImpl.class);
    AvroJob.setCombinerClass(job, ReduceImpl.class);
    AvroJob.setReducerClass(job, ReduceImpl.class);

    FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
    FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
    FileOutputFormat.setCompressOutput(job, true);

    job.setOutputFormat(AvroTrevniOutputFormat.class);

    JobClient.runJob(job);

    wordCountUtil.validateCountsFile();
}

From source file:org.apache.trevni.avro.TestWordCount.java

License:Apache License

public void testInputFormat() throws Exception {
    JobConf job = new JobConf();

    WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");

    Schema subSchema = Schema.parse("{\"type\":\"record\"," + "\"name\":\"PairValue\"," + "\"fields\": [ "
            + "{\"name\":\"value\", \"type\":\"long\"}" + "]}");
    AvroJob.setInputSchema(job, subSchema);
    AvroJob.setMapperClass(job, Counter.class);
    FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
    job.setInputFormat(AvroTrevniInputFormat.class);

    job.setNumReduceTasks(0); // map-only
    job.setOutputFormat(NullOutputFormat.class); // ignore output

    total = 0;/*from w  w w . ja v a 2 s.c  o m*/
    JobClient.runJob(job);
    assertEquals(WordCountUtil.TOTAL, total);
}

From source file:org.apache.whirr.demo.WhirrHadoop.java

License:Apache License

public void runJob() throws Exception {
    Configuration conf = getConfiguration();

    JobConf job = new JobConf(conf, WhirrHadoop.class);
    JobClient client = new JobClient(job);
    waitForTaskTrackers(client);//  w  w  w  .j  av a 2 s . c om

    FileSystem fs = FileSystem.get(conf);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);
    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    reader.close();
}

From source file:org.apache.whirr.examples.HadoopClusterExample.java

License:Apache License

private void runWordCountingJob(Configuration config) throws IOException {
    JobConf job = new JobConf(config, HadoopClusterExample.class);

    FileSystem fs = FileSystem.get(config);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();//from   w ww  .  j  a v  a2s.c o m

    LOG.info("Wrote a file containing 'b a\\n'");

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);

    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));

    String line = reader.readLine();
    int count = 0;
    while (line != null) {
        LOG.info("Line {}: {}", count, line);
        count += 1;
        line = reader.readLine();
    }
    reader.close();
}

From source file:org.apache.whirr.service.cdh.integration.Cdh3HadoopServiceTest.java

License:Apache License

protected void checkHadoop(Configuration conf, JobClient client, JobConf job) throws Exception {
    FileSystem fs = FileSystem.get(conf);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();/*from   ww w.ja v  a 2s . c  o m*/

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);

    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("a\t1", reader.readLine());
    assertEquals("b\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
}

From source file:org.apache.whirr.service.hadoop.HadoopServiceTest.java

License:Apache License

@Test
public void test() throws Exception {
    Configuration conf = getConfiguration();

    JobConf job = new JobConf(conf, HadoopServiceTest.class);
    JobClient client = new JobClient(job);
    waitForTaskTrackers(client);/*from  w w w  .j a  v a2s .c om*/

    FileSystem fs = FileSystem.get(conf);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);

    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("a\t1", reader.readLine());
    assertEquals("b\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();

}

From source file:org.apache.whirr.service.hadoop.integration.AbstractHadoopServiceTest.java

License:Apache License

@Test(timeout = TestConstants.ITEST_TIMEOUT)
public void test() throws Exception {
    Configuration conf = controller.getConfiguration();
    JobConf job = new JobConf(conf, HadoopServiceTest.class);

    FileSystem fs = FileSystem.get(conf);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();//ww  w.  j a v a  2s  .c  om

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);

    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("a\t1", reader.readLine());
    assertEquals("b\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();

}

From source file:org.apache.whirr.service.hadoop.integration.HadoopServiceTest.java

License:Apache License

@Test
public void test() throws Exception {
    Configuration conf = controller.getConfiguration();
    JobConf job = new JobConf(conf, HadoopServiceTest.class);

    FileSystem fs = FileSystem.get(conf);

    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();//from w  w  w .ja  va2  s .c  o m

    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));

    JobClient.runJob(job);

    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("a\t1", reader.readLine());
    assertEquals("b\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();

}