Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroReflectOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }//from www  .j  a v  a  2  s . co m
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroInput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, TextStats.SCHEMA$);

    job.setMapperClass(StatCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SpecificStatsReducer.class);
    AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from   ww w. j  a  va2s .c o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testReflectInput() throws Exception {
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setMapperClass(ReflectCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from w w  w . jav  a 2 s  . c o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

/**
 * Tests the MR output to text files when using AvroKey and AvroValue records.
 *//*w w w  .  ja  v a2  s.  c  o m*/
@Test
public void testAvroUsingTextFileOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(AvroSumReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.setOutputFormatClass(TextOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-text");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    Path filePath = outputFiles[0].getPath();
    InputStream inputStream = filePath.getFileSystem(job.getConfiguration()).open(filePath);
    Assert.assertNotNull(inputStream);
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
    try {
        Assert.assertTrue(reader.ready());
        Assert.assertEquals("apple\t3", reader.readLine());
        Assert.assertEquals("banana\t2", reader.readLine());
        Assert.assertEquals("carrot\t1", reader.readLine());
        Assert.assertFalse(reader.ready());
    } finally {
        reader.close();
    }
}

From source file:org.apache.beam.sdk.io.hdfs.HDFSFileSource.java

License:Apache License

@Override
public void validate() {
    if (validateSource()) {
        try {/*from  w  w w .  j a  v a 2  s .  c  o m*/
            UGIHelper.getBestUGI(username()).doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    final Path pathPattern = new Path(filepattern());
                    FileSystem fs = FileSystem.get(pathPattern.toUri(),
                            SerializableConfiguration.newConfiguration(serializableConfiguration()));
                    FileStatus[] fileStatuses = fs.globStatus(pathPattern);
                    checkState(fileStatuses != null && fileStatuses.length > 0,
                            "Unable to find any files matching %s", filepattern());
                    return null;
                }
            });
        } catch (IOException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:org.apache.carbondata.hadoop.internal.segment.Segment.java

License:Apache License

/**
 * return all InputSplit of this segment, each file is a InputSplit
 * @param job job context//w  ww  .j av  a  2 s . c o  m
 * @return all InputSplit
 * @throws IOException
 */
public List<InputSplit> getAllSplits(JobContext job) throws IOException {
    List<InputSplit> result = new ArrayList<>();
    Path p = new Path(path);
    FileSystem fs = p.getFileSystem(job.getConfiguration());

    //TODO: filter out the hidden files
    FileStatus[] files = fs.globStatus(p);
    for (FileStatus file : files) {
        // make split and add to result
    }
    return result;
}

From source file:org.apache.crunch.impl.mr.exec.CrunchJob.java

License:Apache License

private synchronized void handleMultiPaths() throws IOException {
    if (!multiPaths.isEmpty()) {
        // Need to handle moving the data from the output directory of the
        // job to the output locations specified in the paths.
        FileSystem fs = FileSystem.get(job.getConfiguration());
        for (int i = 0; i < multiPaths.size(); i++) {
            Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*");
            Path[] srcs = FileUtil.stat2Paths(fs.globStatus(src), src);
            Path dst = multiPaths.get(i);
            if (!fs.exists(dst)) {
                fs.mkdirs(dst);//from w  w  w .  java2 s  . c o  m
            }
            int minPartIndex = getMinPartIndex(dst, fs);
            for (Path s : srcs) {
                fs.rename(s, getDestFile(s, dst, minPartIndex++));
            }
        }
    }
}

From source file:org.apache.crunch.io.impl.FileTargetImpl.java

License:Apache License

@Override
public void handleOutputs(Configuration conf, Path workingPath, int index) throws IOException {
    FileSystem srcFs = workingPath.getFileSystem(conf);
    Path src = getSourcePattern(workingPath, index);
    Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(src), src);
    FileSystem dstFs = path.getFileSystem(conf);
    if (!dstFs.exists(path)) {
        dstFs.mkdirs(path);/*  w  ww .ja va2s . co  m*/
    }
    boolean sameFs = isCompatible(srcFs, path);
    for (Path s : srcs) {
        Path d = getDestFile(conf, s, path, s.getName().contains("-m-"));
        if (sameFs) {
            srcFs.rename(s, d);
        } else {
            FileUtil.copy(srcFs, s, dstFs, d, true, true, conf);
        }
    }
    dstFs.create(getSuccessIndicator(), true).close();
}

From source file:org.apache.crunch.io.SourceTargetHelper.java

License:Apache License

public static long getPathSize(FileSystem fs, Path path) throws IOException {
    FileStatus[] stati = fs.globStatus(path);
    if (stati == null || stati.length == 0) {
        return -1L;
    }/*from  ww  w  .j  av  a  2s.  c o m*/
    long size = 0;
    for (FileStatus status : stati) {
        if (status.isDir()) {
            for (FileStatus st : fs.listStatus(status.getPath())) {
                size += st.getLen();
            }
        } else {
            size += status.getLen();
        }
    }
    return size;
}

From source file:org.apache.crunch.io.SourceTargetHelper.java

License:Apache License

public static long getLastModifiedAt(FileSystem fs, Path path) throws IOException {
    FileStatus[] stati = fs.globStatus(path);
    if (stati == null || stati.length == 0) {
        return -1L;
    }//w  w w .  java2  s.  c o  m
    long lastMod = -1;
    for (FileStatus status : stati) {
        if (lastMod < status.getModificationTime()) {
            lastMod = status.getModificationTime();
        }
    }
    return lastMod;
}