List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.lab41.mapreduce.BlueprintsGraphDriver.java
License:Apache License
private Job configureJob1(Configuration conf, FaunusGraph faunusGraph, Path intermediatePath, Configuration job1Config, FileSystem fs) throws IOException { /** Job 1 Configuration **/ Job job1 = new Job(job1Config); job1.setJobName("BluePrintsGraphDriver Job1" + faunusGraph.getInputLocation()); job1.setJarByClass(BlueprintsGraphDriver.class); job1.setMapperClass(BlueprintsGraphOutputMapReduce.VertexMap.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Holder.class); job1.setReducerClass(BlueprintsGraphOutputMapReduce.Reduce.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(FaunusVertex.class); job1.setInputFormatClass(faunusGraph.getGraphInputFormat()); job1.setOutputFormatClass(SequenceFileOutputFormat.class); if (fs.isDirectory(intermediatePath)) { logger.info("Exists" + intermediatePath + " -- deleting!"); fs.delete(intermediatePath, true); }/*from ww w . jav a 2 s . c o m*/ FileOutputFormat.setOutputPath(job1, intermediatePath); Path inputPath = faunusGraph.getInputLocation(); FileInputFormat.setInputPaths(job1, inputPath); /***** Figure out how many reducer ********/ Path[] paths = SequenceFileInputFormat.getInputPaths(job1); long splits = HdfsUtil.getNumOfSplitsForInputs(paths, conf, MB); // The job is configure with 4 gb of memory; job1.setNumReduceTasks((int) Math.ceil(splits / 48)); return job1; }
From source file:org.lab41.mapreduce.IdUsingBulkLoaderDriver.java
License:Apache License
private Job configureJob2(Configuration baseConfiguration, FaunusGraph faunusGraph, FileSystem fs) throws IOException { Configuration job2Config = new Configuration(baseConfiguration); /** Job 2 Configuration **/ Job job2 = new Job(job2Config); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(faunusGraph.getGraphOutputFormat()); job2.setJobName("IdUsingBulkLoader Job2: " + faunusGraph.getInputLocation()); job2.setJarByClass(IdUsingBulkLoaderDriver.class); job2.setMapperClass(IdUsingBulkLoaderMapReduce.EdgeMapper.class); job2.setMapOutputKeyClass(NullWritable.class); job2.setMapOutputValueClass(FaunusVertex.class); FileInputFormat.setInputPaths(job2, faunusGraph.getInputLocation()); job2.setNumReduceTasks(0);/*from w w w.j av a2 s .c o m*/ String strJob2OutputPath = faunusGraph.getOutputLocation().toString(); Path job2Path = new Path(strJob2OutputPath + "/job2"); if (fs.isDirectory(job2Path)) { logger.info("Exists" + strJob2OutputPath + " --deleteing"); fs.delete(job2Path, true); } FileOutputFormat.setOutputPath(job2, job2Path); return job2; }
From source file:org.lab41.mapreduce.IdUsingBulkLoaderDriver.java
License:Apache License
private Job configureJob1(FaunusGraph faunusGraph, Path intermediatePath, Configuration baseConfiguration, FileSystem fs) throws IOException { Configuration job1Config = new Configuration(baseConfiguration); /** Job 1 Configuration **/ Job job1 = new Job(job1Config); job1.setJobName("IdUsingBulkLoader Job1" + faunusGraph.getInputLocation()); job1.setJarByClass(IdUsingBulkLoaderDriver.class); job1.setMapperClass(IdUsingBulkLoaderMapReduce.VertexMapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Holder.class); job1.setNumReduceTasks(0);/*from w w w. j av a 2 s . c o m*/ job1.setInputFormatClass(faunusGraph.getGraphInputFormat()); job1.setOutputFormatClass(SequenceFileOutputFormat.class); if (fs.isDirectory(intermediatePath)) { logger.info("Exists" + intermediatePath + " -- deleting!"); fs.delete(intermediatePath, true); } FileOutputFormat.setOutputPath(job1, intermediatePath); Path inputPath = faunusGraph.getInputLocation(); FileInputFormat.setInputPaths(job1, inputPath); return job1; }
From source file:org.mrgeo.test.MapOpTestVectorUtils.java
License:Apache License
public void compareVectors(Configuration conf, String testName) throws IOException { Path output = new Path(outputHdfs, testName); FileSystem fs = HadoopFileUtils.getFileSystem(conf, output); Path[] srcFiles;//from ww w. ja va 2s. c o m if (fs.isDirectory(output)) { FileStatus[] files = fs.listStatus(output); if (files == null || files.length == 0) { Assert.fail("No files founds: " + output.toString()); } srcFiles = new Path[files.length]; int cnt = 0; for (FileStatus file : files) { srcFiles[cnt++] = file.getPath(); } } else { srcFiles = new Path[] { output }; } for (Path file : srcFiles) { // read in the output file final long l = fs.getFileStatus(file).getLen(); final byte[] testBuffer = new byte[(int) l]; final FSDataInputStream fdis = fs.open(file); fdis.read(testBuffer); fdis.close(); File baselineVector = new File(inputLocal + testName + "/" + file.getName()); if (!baselineVector.exists()) { Assert.fail("Golden test file missing: " + baselineVector.toString()); } // read in the baseline final byte[] baselineBuffer = new byte[(int) baselineVector.length()]; final FileInputStream fis = new FileInputStream(baselineVector); fis.read(baselineBuffer); fis.close(); Assert.assertEquals("Output is different!", new String(baselineBuffer), new String(testBuffer)); } }
From source file:org.s1ck.ldbc.LDBCToFlink.java
License:Open Source License
private void initFromHDFS() { try {//from w ww . j av a2 s .co m FileSystem fs = FileSystem.get(conf); Path p = new Path(ldbcDirectory); if (!fs.exists(p) || !fs.isDirectory(p)) { throw new IllegalArgumentException( String.format("%s does not exist or is not a directory", ldbcDirectory)); } FileStatus[] fileStates = fs.listStatus(p); for (FileStatus fileStatus : fileStates) { String filePath = fileStatus.getPath().getName(); if (isVertexFile(filePath)) { vertexFilePaths.add(ldbcDirectory + filePath); } else if (isEdgeFile(filePath)) { edgeFilePaths.add(ldbcDirectory + filePath); } else if (isPropertyFile(filePath)) { propertyFilePaths.add(ldbcDirectory + filePath); } } } catch (IOException e) { e.printStackTrace(); } }
From source file:org.talend.components.test.MiniDfsResource.java
License:Open Source License
/** * Tests that a file on the HDFS cluster contains the given texts. * * @param path the name of the file on the HDFS cluster * @param expected the expected lines in the file (not including terminating end-of-lines). *///from w w w .j a va2s . com public static void assertReadFile(FileSystem fs, String path, String... expected) throws IOException { Path p = new Path(path); if (fs.isFile(p)) { try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path))))) { for (String line : expected) assertThat(r.readLine(), is(line)); assertThat(r.readLine(), nullValue()); } } else if (fs.isDirectory(p)) { HashSet<String> expect = new HashSet<>(Arrays.asList(expected)); for (FileStatus fstatus : fs.listStatus(p)) { try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(fstatus.getPath())))) { String line = null; while (null != (line = r.readLine())) if (!expect.remove(line)) fail("Unexpected line: " + line); } } // Check before asserting for the message. if (expect.size() != 0) assertThat("Not all lines found: " + expect.iterator().next(), expect, hasSize(0)); } else { fail("No such path: " + path); } }
From source file:org.talend.components.test.MiniDfsResource.java
License:Open Source License
/** * Tests that a file on the HDFS cluster contains the given texts. * * @param path the name of the file on the HDFS cluster * @param expected the expected lines in the file (not including terminating end-of-lines). *///from ww w . j a v a 2s.co m public static void assertReadFile(String recordDelimiter, FileSystem fs, String path, String... expected) throws IOException { Path p = new Path(path); if (fs.isFile(p)) { try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path))))) { Scanner s = new Scanner(r).useDelimiter(recordDelimiter); for (String line : expected) { assertThat(s.next(), is(line)); } assertThat(s.hasNext(), is(false)); } } else if (fs.isDirectory(p)) { HashSet<String> expect = new HashSet<>(Arrays.asList(expected)); for (FileStatus fstatus : fs.listStatus(p)) { try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(fstatus.getPath())))) { Scanner s = new Scanner(r).useDelimiter(recordDelimiter); String line = null; while (s.hasNext()) { line = s.next(); if (!expect.remove(line)) fail("Unexpected line: " + line); } } } // Check before asserting for the message. if (expect.size() != 0) assertThat("Not all lines found: " + expect.iterator().next(), expect, hasSize(0)); } else { fail("No such path: " + path); } }
From source file:org.talend.components.test.MiniDfsResource.java
License:Open Source License
/** * Tests that a file on the HDFS cluster contains the given avro. * * @param path the name of the file on the HDFS cluster * @param expected the expected avro record in the file . */// w ww. j av a 2 s . c o m public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException { Path p = new Path(path); if (fs.isFile(p)) { try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) { IndexedRecord record = null; while (reader.hasNext()) { record = reader.iterator().next(); IndexedRecord eqRecord = null; for (IndexedRecord indexedRecord : expected) { if (indexedRecord.equals(record)) { eqRecord = indexedRecord; break; } } expected.remove(eqRecord); } } // Check before asserting for the message. if (!part && expected.size() != 0) assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0)); } else if (fs.isDirectory(p)) { for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) { assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true); } // Check before asserting for the message. if (expected.size() != 0) assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0)); } else { fail("No such path: " + path); } }
From source file:org.talend.components.test.MiniDfsResource.java
License:Open Source License
/** * Tests that a file on the HDFS cluster contains the given parquet. * * @param path the name of the file on the HDFS cluster * @param expected the expected avro record in the file . */// w w w . j a va 2 s . c o m public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException { Path p = new Path(path); if (fs.isFile(p)) { try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(), new Path(path))) { IndexedRecord record = null; while (null != (record = reader.read())) { IndexedRecord eqRecord = null; for (IndexedRecord indexedRecord : expected) { if (indexedRecord.equals(record)) { eqRecord = indexedRecord; break; } } expected.remove(eqRecord); } } // Check before asserting for the message. if (!part && expected.size() != 0) assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0)); } else if (fs.isDirectory(p)) { for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) { assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true); } // Check before asserting for the message. if (expected.size() != 0) assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0)); } else { fail("No such path: " + path); } }
From source file:org.trustedanalytics.resourceserver.data.InputStreamProvider.java
License:Apache License
/** * Gets an InputStream for a path on HDFS. * * If given path is a directory, it will read files inside that dir and create * a SequenceInputStream from them, which emulates reading from directory just like from * a regular file. Notice that this method is not meant to read huge datasets * (as well as the whole project)./*from ww w. ja v a 2s. c om*/ * @param path * @return * @throws IOException */ public InputStream getInputStream(Path path) throws IOException { Objects.requireNonNull(path); FileSystem fs = hdfsConfig.getFileSystem(); if (fs.isFile(path)) { return fs.open(path); } else if (fs.isDirectory(path)) { FileStatus[] files = fs.listStatus(path); List<InputStream> paths = Arrays.stream(files).map(f -> { try { return fs.open(f.getPath()); } catch (IOException e) { LOGGER.log(Level.SEVERE, "Cannot read file " + f.getPath().toString(), e); return null; } }).filter(f -> f != null).collect(Collectors.toList()); return new SequenceInputStream(Collections.enumeration(paths)); } else { throw new IllegalArgumentException("Given path " + path.toString() + " is neither file nor directory"); } }