List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException
Return all the files that match filePattern and are not checksum files.
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testAvroReflectOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ReflectStatsReducer.class); AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new ReflectDatumReader<ReflectStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (ReflectStats record : reader) { counts.put(record.name.toString(), record.count); }//from www .j a v a 2 s . co m reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testAvroInput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString())); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, TextStats.SCHEMA$); job.setMapperClass(StatCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SpecificStatsReducer.class); AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific-input"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<TextStats> reader = new DataFileReader<TextStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new SpecificDatumReader<TextStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (TextStats record : reader) { counts.put(record.name.toString(), record.count); }/*from ww w. j a va2s .c o m*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testReflectInput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString())); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, REFLECT_STATS_SCHEMA); job.setMapperClass(ReflectCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ReflectStatsReducer.class); AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect-input"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new ReflectDatumReader<ReflectStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (ReflectStats record : reader) { counts.put(record.name.toString(), record.count); }/*from w w w . jav a 2 s . c o m*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
/** * Tests the MR output to text files when using AvroKey and AvroValue records. *//*w w w . ja v a2 s. c o m*/ @Test public void testAvroUsingTextFileOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(AvroSumReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); job.setOutputFormatClass(TextOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-text"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); Path filePath = outputFiles[0].getPath(); InputStream inputStream = filePath.getFileSystem(job.getConfiguration()).open(filePath); Assert.assertNotNull(inputStream); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); try { Assert.assertTrue(reader.ready()); Assert.assertEquals("apple\t3", reader.readLine()); Assert.assertEquals("banana\t2", reader.readLine()); Assert.assertEquals("carrot\t1", reader.readLine()); Assert.assertFalse(reader.ready()); } finally { reader.close(); } }
From source file:org.apache.beam.sdk.io.hdfs.HDFSFileSource.java
License:Apache License
@Override public void validate() { if (validateSource()) { try {/*from w w w . j a v a 2 s . c o m*/ UGIHelper.getBestUGI(username()).doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { final Path pathPattern = new Path(filepattern()); FileSystem fs = FileSystem.get(pathPattern.toUri(), SerializableConfiguration.newConfiguration(serializableConfiguration())); FileStatus[] fileStatuses = fs.globStatus(pathPattern); checkState(fileStatuses != null && fileStatuses.length > 0, "Unable to find any files matching %s", filepattern()); return null; } }); } catch (IOException | InterruptedException e) { throw new RuntimeException(e); } } }
From source file:org.apache.carbondata.hadoop.internal.segment.Segment.java
License:Apache License
/** * return all InputSplit of this segment, each file is a InputSplit * @param job job context//w ww .j av a 2 s . c o m * @return all InputSplit * @throws IOException */ public List<InputSplit> getAllSplits(JobContext job) throws IOException { List<InputSplit> result = new ArrayList<>(); Path p = new Path(path); FileSystem fs = p.getFileSystem(job.getConfiguration()); //TODO: filter out the hidden files FileStatus[] files = fs.globStatus(p); for (FileStatus file : files) { // make split and add to result } return result; }
From source file:org.apache.crunch.impl.mr.exec.CrunchJob.java
License:Apache License
private synchronized void handleMultiPaths() throws IOException { if (!multiPaths.isEmpty()) { // Need to handle moving the data from the output directory of the // job to the output locations specified in the paths. FileSystem fs = FileSystem.get(job.getConfiguration()); for (int i = 0; i < multiPaths.size(); i++) { Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*"); Path[] srcs = FileUtil.stat2Paths(fs.globStatus(src), src); Path dst = multiPaths.get(i); if (!fs.exists(dst)) { fs.mkdirs(dst);//from w w w . java2 s . c o m } int minPartIndex = getMinPartIndex(dst, fs); for (Path s : srcs) { fs.rename(s, getDestFile(s, dst, minPartIndex++)); } } } }
From source file:org.apache.crunch.io.impl.FileTargetImpl.java
License:Apache License
@Override public void handleOutputs(Configuration conf, Path workingPath, int index) throws IOException { FileSystem srcFs = workingPath.getFileSystem(conf); Path src = getSourcePattern(workingPath, index); Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(src), src); FileSystem dstFs = path.getFileSystem(conf); if (!dstFs.exists(path)) { dstFs.mkdirs(path);/* w ww .ja va2s . co m*/ } boolean sameFs = isCompatible(srcFs, path); for (Path s : srcs) { Path d = getDestFile(conf, s, path, s.getName().contains("-m-")); if (sameFs) { srcFs.rename(s, d); } else { FileUtil.copy(srcFs, s, dstFs, d, true, true, conf); } } dstFs.create(getSuccessIndicator(), true).close(); }
From source file:org.apache.crunch.io.SourceTargetHelper.java
License:Apache License
public static long getPathSize(FileSystem fs, Path path) throws IOException { FileStatus[] stati = fs.globStatus(path); if (stati == null || stati.length == 0) { return -1L; }/*from ww w .j av a 2s. c o m*/ long size = 0; for (FileStatus status : stati) { if (status.isDir()) { for (FileStatus st : fs.listStatus(status.getPath())) { size += st.getLen(); } } else { size += status.getLen(); } } return size; }
From source file:org.apache.crunch.io.SourceTargetHelper.java
License:Apache License
public static long getLastModifiedAt(FileSystem fs, Path path) throws IOException { FileStatus[] stati = fs.globStatus(path); if (stati == null || stati.length == 0) { return -1L; }//w w w . java2 s. c o m long lastMod = -1; for (FileStatus status : stati) { if (lastMod < status.getModificationTime()) { lastMod = status.getModificationTime(); } } return lastMod; }